{ "best_metric": 77.3865625420762, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Arabic-PADT/checkpoint-15000", "epoch": 78.94736842105263, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 7.52e-05, "loss": 4.0025, "step": 100 }, { "epoch": 1.05, "learning_rate": 7.949530201342283e-05, "loss": 1.5372, "step": 200 }, { "epoch": 1.58, "learning_rate": 7.895838926174497e-05, "loss": 1.2033, "step": 300 }, { "epoch": 2.11, "learning_rate": 7.842147651006712e-05, "loss": 1.0746, "step": 400 }, { "epoch": 2.63, "learning_rate": 7.788456375838927e-05, "loss": 0.8869, "step": 500 }, { "epoch": 2.63, "eval_las": 73.72761545711593, "eval_loss": 1.1565440893173218, "eval_runtime": 7.1881, "eval_samples_per_second": 126.459, "eval_steps_per_second": 15.86, "eval_uas": 79.98518917463309, "step": 500 }, { "epoch": 3.16, "learning_rate": 7.734765100671142e-05, "loss": 0.8237, "step": 600 }, { "epoch": 3.68, "learning_rate": 7.681073825503357e-05, "loss": 0.6923, "step": 700 }, { "epoch": 4.21, "learning_rate": 7.627382550335572e-05, "loss": 0.6289, "step": 800 }, { "epoch": 4.74, "learning_rate": 7.573691275167786e-05, "loss": 0.5416, "step": 900 }, { "epoch": 5.26, "learning_rate": 7.52e-05, "loss": 0.4842, "step": 1000 }, { "epoch": 5.26, "eval_las": 75.07405412683453, "eval_loss": 1.3640334606170654, "eval_runtime": 7.1724, "eval_samples_per_second": 126.735, "eval_steps_per_second": 15.894, "eval_uas": 81.23737713747138, "step": 1000 }, { "epoch": 5.79, "learning_rate": 7.466308724832215e-05, "loss": 0.4232, "step": 1100 }, { "epoch": 6.32, "learning_rate": 7.41261744966443e-05, "loss": 0.3678, "step": 1200 }, { "epoch": 6.84, "learning_rate": 7.358926174496644e-05, "loss": 0.3381, "step": 1300 }, { "epoch": 7.37, "learning_rate": 7.305234899328859e-05, "loss": 0.2829, "step": 1400 }, { "epoch": 7.89, "learning_rate": 7.251543624161074e-05, "loss": 0.2679, "step": 1500 }, { "epoch": 7.89, "eval_las": 75.22889457385216, "eval_loss": 1.5819050073623657, "eval_runtime": 7.1677, "eval_samples_per_second": 126.819, "eval_steps_per_second": 15.905, "eval_uas": 81.27777029756295, "step": 1500 }, { "epoch": 8.42, "learning_rate": 7.197852348993289e-05, "loss": 0.217, "step": 1600 }, { "epoch": 8.95, "learning_rate": 7.144161073825504e-05, "loss": 0.2221, "step": 1700 }, { "epoch": 9.47, "learning_rate": 7.090469798657718e-05, "loss": 0.1718, "step": 1800 }, { "epoch": 10.0, "learning_rate": 7.036778523489933e-05, "loss": 0.1889, "step": 1900 }, { "epoch": 10.53, "learning_rate": 6.983087248322148e-05, "loss": 0.1429, "step": 2000 }, { "epoch": 10.53, "eval_las": 75.5722364346304, "eval_loss": 1.906549334526062, "eval_runtime": 7.1719, "eval_samples_per_second": 126.744, "eval_steps_per_second": 15.895, "eval_uas": 81.56052241820385, "step": 2000 }, { "epoch": 11.05, "learning_rate": 6.929395973154363e-05, "loss": 0.1566, "step": 2100 }, { "epoch": 11.58, "learning_rate": 6.875704697986578e-05, "loss": 0.1271, "step": 2200 }, { "epoch": 12.11, "learning_rate": 6.822013422818793e-05, "loss": 0.1319, "step": 2300 }, { "epoch": 12.63, "learning_rate": 6.768322147651007e-05, "loss": 0.118, "step": 2400 }, { "epoch": 13.16, "learning_rate": 6.714630872483222e-05, "loss": 0.1182, "step": 2500 }, { "epoch": 13.16, "eval_las": 75.51164669449307, "eval_loss": 2.0345656871795654, "eval_runtime": 7.1813, "eval_samples_per_second": 126.579, "eval_steps_per_second": 15.875, "eval_uas": 81.62447825501549, "step": 2500 }, { "epoch": 13.68, "learning_rate": 6.660939597315437e-05, "loss": 0.1045, "step": 2600 }, { "epoch": 14.21, "learning_rate": 6.607248322147652e-05, "loss": 0.1042, "step": 2700 }, { "epoch": 14.74, "learning_rate": 6.553557046979867e-05, "loss": 0.0959, "step": 2800 }, { "epoch": 15.26, "learning_rate": 6.499865771812081e-05, "loss": 0.0923, "step": 2900 }, { "epoch": 15.79, "learning_rate": 6.446174496644296e-05, "loss": 0.0874, "step": 3000 }, { "epoch": 15.79, "eval_las": 76.13774067591221, "eval_loss": 2.1854023933410645, "eval_runtime": 7.165, "eval_samples_per_second": 126.867, "eval_steps_per_second": 15.911, "eval_uas": 82.03177595260536, "step": 3000 }, { "epoch": 16.32, "learning_rate": 6.392483221476511e-05, "loss": 0.086, "step": 3100 }, { "epoch": 16.84, "learning_rate": 6.338791946308726e-05, "loss": 0.0831, "step": 3200 }, { "epoch": 17.37, "learning_rate": 6.28510067114094e-05, "loss": 0.0752, "step": 3300 }, { "epoch": 17.89, "learning_rate": 6.231409395973154e-05, "loss": 0.0796, "step": 3400 }, { "epoch": 18.42, "learning_rate": 6.177718120805369e-05, "loss": 0.0708, "step": 3500 }, { "epoch": 18.42, "eval_las": 75.79776491180826, "eval_loss": 2.4079720973968506, "eval_runtime": 7.1633, "eval_samples_per_second": 126.897, "eval_steps_per_second": 15.915, "eval_uas": 81.9038642789821, "step": 3500 }, { "epoch": 18.95, "learning_rate": 6.124026845637584e-05, "loss": 0.0768, "step": 3600 }, { "epoch": 19.47, "learning_rate": 6.070335570469799e-05, "loss": 0.0663, "step": 3700 }, { "epoch": 20.0, "learning_rate": 6.0166442953020136e-05, "loss": 0.0687, "step": 3800 }, { "epoch": 20.53, "learning_rate": 5.962953020134229e-05, "loss": 0.0633, "step": 3900 }, { "epoch": 21.05, "learning_rate": 5.909261744966444e-05, "loss": 0.0681, "step": 4000 }, { "epoch": 21.05, "eval_las": 76.15793725595799, "eval_loss": 2.4060051441192627, "eval_runtime": 7.167, "eval_samples_per_second": 126.831, "eval_steps_per_second": 15.906, "eval_uas": 82.17315201292581, "step": 4000 }, { "epoch": 21.58, "learning_rate": 5.855570469798659e-05, "loss": 0.0586, "step": 4100 }, { "epoch": 22.11, "learning_rate": 5.8018791946308735e-05, "loss": 0.0635, "step": 4200 }, { "epoch": 22.63, "learning_rate": 5.7481879194630884e-05, "loss": 0.0563, "step": 4300 }, { "epoch": 23.16, "learning_rate": 5.694496644295303e-05, "loss": 0.0586, "step": 4400 }, { "epoch": 23.68, "learning_rate": 5.6408053691275166e-05, "loss": 0.0524, "step": 4500 }, { "epoch": 23.68, "eval_las": 75.88528342533998, "eval_loss": 2.648954153060913, "eval_runtime": 7.1677, "eval_samples_per_second": 126.819, "eval_steps_per_second": 15.905, "eval_uas": 81.84664063551905, "step": 4500 }, { "epoch": 24.21, "learning_rate": 5.5871140939597315e-05, "loss": 0.0533, "step": 4600 }, { "epoch": 24.74, "learning_rate": 5.533422818791946e-05, "loss": 0.0513, "step": 4700 }, { "epoch": 25.26, "learning_rate": 5.479731543624161e-05, "loss": 0.0538, "step": 4800 }, { "epoch": 25.79, "learning_rate": 5.426040268456376e-05, "loss": 0.0531, "step": 4900 }, { "epoch": 26.32, "learning_rate": 5.372348993288591e-05, "loss": 0.0516, "step": 5000 }, { "epoch": 26.32, "eval_las": 76.18823212602666, "eval_loss": 2.6080164909362793, "eval_runtime": 7.1678, "eval_samples_per_second": 126.817, "eval_steps_per_second": 15.904, "eval_uas": 82.22027736636596, "step": 5000 }, { "epoch": 26.84, "learning_rate": 5.3186577181208056e-05, "loss": 0.0459, "step": 5100 }, { "epoch": 27.37, "learning_rate": 5.2649664429530204e-05, "loss": 0.0466, "step": 5200 }, { "epoch": 27.89, "learning_rate": 5.211275167785235e-05, "loss": 0.0503, "step": 5300 }, { "epoch": 28.42, "learning_rate": 5.15758389261745e-05, "loss": 0.0461, "step": 5400 }, { "epoch": 28.95, "learning_rate": 5.1038926174496656e-05, "loss": 0.0452, "step": 5500 }, { "epoch": 28.95, "eval_las": 76.16803554598089, "eval_loss": 2.6569535732269287, "eval_runtime": 7.1554, "eval_samples_per_second": 127.037, "eval_steps_per_second": 15.932, "eval_uas": 82.10246398276558, "step": 5500 }, { "epoch": 29.47, "learning_rate": 5.050201342281879e-05, "loss": 0.0445, "step": 5600 }, { "epoch": 30.0, "learning_rate": 4.996510067114094e-05, "loss": 0.0416, "step": 5700 }, { "epoch": 30.53, "learning_rate": 4.942818791946309e-05, "loss": 0.0408, "step": 5800 }, { "epoch": 31.05, "learning_rate": 4.8891275167785235e-05, "loss": 0.0405, "step": 5900 }, { "epoch": 31.58, "learning_rate": 4.835436241610738e-05, "loss": 0.0398, "step": 6000 }, { "epoch": 31.58, "eval_las": 76.00982900228894, "eval_loss": 2.7657480239868164, "eval_runtime": 7.157, "eval_samples_per_second": 127.008, "eval_steps_per_second": 15.928, "eval_uas": 82.13949104618284, "step": 6000 }, { "epoch": 32.11, "learning_rate": 4.781744966442953e-05, "loss": 0.0411, "step": 6100 }, { "epoch": 32.63, "learning_rate": 4.728053691275168e-05, "loss": 0.037, "step": 6200 }, { "epoch": 33.16, "learning_rate": 4.674362416107383e-05, "loss": 0.0371, "step": 6300 }, { "epoch": 33.68, "learning_rate": 4.6206711409395976e-05, "loss": 0.0383, "step": 6400 }, { "epoch": 34.21, "learning_rate": 4.5669798657718125e-05, "loss": 0.0374, "step": 6500 }, { "epoch": 34.21, "eval_las": 76.46761814999327, "eval_loss": 2.943580389022827, "eval_runtime": 7.1627, "eval_samples_per_second": 126.908, "eval_steps_per_second": 15.916, "eval_uas": 82.3751178133836, "step": 6500 }, { "epoch": 34.74, "learning_rate": 4.513288590604027e-05, "loss": 0.0355, "step": 6600 }, { "epoch": 35.26, "learning_rate": 4.459597315436242e-05, "loss": 0.0338, "step": 6700 }, { "epoch": 35.79, "learning_rate": 4.405906040268456e-05, "loss": 0.0354, "step": 6800 }, { "epoch": 36.32, "learning_rate": 4.352214765100671e-05, "loss": 0.0328, "step": 6900 }, { "epoch": 36.84, "learning_rate": 4.298523489932886e-05, "loss": 0.0363, "step": 7000 }, { "epoch": 36.84, "eval_las": 76.36663524976437, "eval_loss": 2.909515857696533, "eval_runtime": 7.1643, "eval_samples_per_second": 126.879, "eval_steps_per_second": 15.912, "eval_uas": 82.31452807324627, "step": 7000 }, { "epoch": 37.37, "learning_rate": 4.244832214765101e-05, "loss": 0.0329, "step": 7100 }, { "epoch": 37.89, "learning_rate": 4.1911409395973156e-05, "loss": 0.0319, "step": 7200 }, { "epoch": 38.42, "learning_rate": 4.1374496644295304e-05, "loss": 0.0321, "step": 7300 }, { "epoch": 38.95, "learning_rate": 4.083758389261745e-05, "loss": 0.033, "step": 7400 }, { "epoch": 39.47, "learning_rate": 4.03006711409396e-05, "loss": 0.0312, "step": 7500 }, { "epoch": 39.47, "eval_las": 76.26901844620978, "eval_loss": 3.006049633026123, "eval_runtime": 7.1637, "eval_samples_per_second": 126.89, "eval_steps_per_second": 15.914, "eval_uas": 82.08226740271981, "step": 7500 }, { "epoch": 40.0, "learning_rate": 3.976375838926175e-05, "loss": 0.0305, "step": 7600 }, { "epoch": 40.53, "learning_rate": 3.92268456375839e-05, "loss": 0.0284, "step": 7700 }, { "epoch": 41.05, "learning_rate": 3.8689932885906045e-05, "loss": 0.0301, "step": 7800 }, { "epoch": 41.58, "learning_rate": 3.815302013422819e-05, "loss": 0.0295, "step": 7900 }, { "epoch": 42.11, "learning_rate": 3.761610738255034e-05, "loss": 0.0291, "step": 8000 }, { "epoch": 42.11, "eval_las": 76.41039450653024, "eval_loss": 3.065589189529419, "eval_runtime": 7.1599, "eval_samples_per_second": 126.957, "eval_steps_per_second": 15.922, "eval_uas": 82.25730442978322, "step": 8000 }, { "epoch": 42.63, "learning_rate": 3.707919463087249e-05, "loss": 0.0259, "step": 8100 }, { "epoch": 43.16, "learning_rate": 3.654228187919463e-05, "loss": 0.0287, "step": 8200 }, { "epoch": 43.68, "learning_rate": 3.600536912751678e-05, "loss": 0.0262, "step": 8300 }, { "epoch": 44.21, "learning_rate": 3.546845637583893e-05, "loss": 0.026, "step": 8400 }, { "epoch": 44.74, "learning_rate": 3.4931543624161076e-05, "loss": 0.027, "step": 8500 }, { "epoch": 44.74, "eval_las": 76.36326915309007, "eval_loss": 3.132659912109375, "eval_runtime": 7.1605, "eval_samples_per_second": 126.947, "eval_steps_per_second": 15.921, "eval_uas": 82.23037565638886, "step": 8500 }, { "epoch": 45.26, "learning_rate": 3.4394630872483224e-05, "loss": 0.0248, "step": 8600 }, { "epoch": 45.79, "learning_rate": 3.385771812080537e-05, "loss": 0.0245, "step": 8700 }, { "epoch": 46.32, "learning_rate": 3.332080536912752e-05, "loss": 0.025, "step": 8800 }, { "epoch": 46.84, "learning_rate": 3.278389261744967e-05, "loss": 0.0253, "step": 8900 }, { "epoch": 47.37, "learning_rate": 3.224697986577182e-05, "loss": 0.0244, "step": 9000 }, { "epoch": 47.37, "eval_las": 76.79749562407432, "eval_loss": 3.1936397552490234, "eval_runtime": 7.1684, "eval_samples_per_second": 126.807, "eval_steps_per_second": 15.903, "eval_uas": 82.52995826040123, "step": 9000 }, { "epoch": 47.89, "learning_rate": 3.1710067114093965e-05, "loss": 0.0252, "step": 9100 }, { "epoch": 48.42, "learning_rate": 3.1173154362416114e-05, "loss": 0.0219, "step": 9200 }, { "epoch": 48.95, "learning_rate": 3.0636241610738255e-05, "loss": 0.0253, "step": 9300 }, { "epoch": 49.47, "learning_rate": 3.0099328859060403e-05, "loss": 0.0233, "step": 9400 }, { "epoch": 50.0, "learning_rate": 2.956241610738255e-05, "loss": 0.0231, "step": 9500 }, { "epoch": 50.0, "eval_las": 76.79412952740002, "eval_loss": 3.195754051208496, "eval_runtime": 7.1619, "eval_samples_per_second": 126.921, "eval_steps_per_second": 15.917, "eval_uas": 82.52659216372695, "step": 9500 }, { "epoch": 50.53, "learning_rate": 2.90255033557047e-05, "loss": 0.0204, "step": 9600 }, { "epoch": 51.05, "learning_rate": 2.8488590604026848e-05, "loss": 0.0247, "step": 9700 }, { "epoch": 51.58, "learning_rate": 2.7951677852348996e-05, "loss": 0.0217, "step": 9800 }, { "epoch": 52.11, "learning_rate": 2.741476510067114e-05, "loss": 0.0217, "step": 9900 }, { "epoch": 52.63, "learning_rate": 2.687785234899329e-05, "loss": 0.021, "step": 10000 }, { "epoch": 52.63, "eval_las": 76.66621785377676, "eval_loss": 3.3272011280059814, "eval_runtime": 7.1591, "eval_samples_per_second": 126.971, "eval_steps_per_second": 15.924, "eval_uas": 82.59728019388717, "step": 10000 }, { "epoch": 53.16, "learning_rate": 2.6340939597315438e-05, "loss": 0.0197, "step": 10100 }, { "epoch": 53.68, "learning_rate": 2.5804026845637586e-05, "loss": 0.0229, "step": 10200 }, { "epoch": 54.21, "learning_rate": 2.5267114093959734e-05, "loss": 0.0196, "step": 10300 }, { "epoch": 54.74, "learning_rate": 2.4730201342281882e-05, "loss": 0.0211, "step": 10400 }, { "epoch": 55.26, "learning_rate": 2.4193288590604027e-05, "loss": 0.0198, "step": 10500 }, { "epoch": 55.26, "eval_las": 76.77056685067996, "eval_loss": 3.3302974700927734, "eval_runtime": 7.1625, "eval_samples_per_second": 126.911, "eval_steps_per_second": 15.916, "eval_uas": 82.58718190386428, "step": 10500 }, { "epoch": 55.79, "learning_rate": 2.3656375838926175e-05, "loss": 0.0207, "step": 10600 }, { "epoch": 56.32, "learning_rate": 2.3119463087248324e-05, "loss": 0.0184, "step": 10700 }, { "epoch": 56.84, "learning_rate": 2.2582550335570472e-05, "loss": 0.0197, "step": 10800 }, { "epoch": 57.37, "learning_rate": 2.204563758389262e-05, "loss": 0.0197, "step": 10900 }, { "epoch": 57.89, "learning_rate": 2.150872483221477e-05, "loss": 0.02, "step": 11000 }, { "epoch": 57.89, "eval_las": 76.88838023428033, "eval_loss": 3.3782100677490234, "eval_runtime": 7.1625, "eval_samples_per_second": 126.91, "eval_steps_per_second": 15.916, "eval_uas": 82.61411067725865, "step": 11000 }, { "epoch": 58.42, "learning_rate": 2.0971812080536913e-05, "loss": 0.0192, "step": 11100 }, { "epoch": 58.95, "learning_rate": 2.043489932885906e-05, "loss": 0.0182, "step": 11200 }, { "epoch": 59.47, "learning_rate": 1.989798657718121e-05, "loss": 0.0186, "step": 11300 }, { "epoch": 60.0, "learning_rate": 1.9361073825503358e-05, "loss": 0.0178, "step": 11400 }, { "epoch": 60.53, "learning_rate": 1.8824161073825503e-05, "loss": 0.0165, "step": 11500 }, { "epoch": 60.53, "eval_las": 77.07351555136664, "eval_loss": 3.4528448581695557, "eval_runtime": 7.1527, "eval_samples_per_second": 127.085, "eval_steps_per_second": 15.938, "eval_uas": 82.7622189309277, "step": 11500 }, { "epoch": 61.05, "learning_rate": 1.828724832214765e-05, "loss": 0.0195, "step": 11600 }, { "epoch": 61.58, "learning_rate": 1.77503355704698e-05, "loss": 0.019, "step": 11700 }, { "epoch": 62.11, "learning_rate": 1.7213422818791948e-05, "loss": 0.0157, "step": 11800 }, { "epoch": 62.63, "learning_rate": 1.6676510067114096e-05, "loss": 0.0174, "step": 11900 }, { "epoch": 63.16, "learning_rate": 1.6139597315436244e-05, "loss": 0.019, "step": 12000 }, { "epoch": 63.16, "eval_las": 76.99946142453211, "eval_loss": 3.499229669570923, "eval_runtime": 7.1524, "eval_samples_per_second": 127.09, "eval_steps_per_second": 15.939, "eval_uas": 82.73865625420763, "step": 12000 }, { "epoch": 63.68, "learning_rate": 1.560268456375839e-05, "loss": 0.0151, "step": 12100 }, { "epoch": 64.21, "learning_rate": 1.5065771812080539e-05, "loss": 0.0175, "step": 12200 }, { "epoch": 64.74, "learning_rate": 1.4528859060402685e-05, "loss": 0.0165, "step": 12300 }, { "epoch": 65.26, "learning_rate": 1.3991946308724834e-05, "loss": 0.0184, "step": 12400 }, { "epoch": 65.79, "learning_rate": 1.3455033557046982e-05, "loss": 0.0175, "step": 12500 }, { "epoch": 65.79, "eval_las": 77.08361384138952, "eval_loss": 3.48171067237854, "eval_runtime": 7.1671, "eval_samples_per_second": 126.829, "eval_steps_per_second": 15.906, "eval_uas": 82.72182577083615, "step": 12500 }, { "epoch": 66.32, "learning_rate": 1.2918120805369127e-05, "loss": 0.0173, "step": 12600 }, { "epoch": 66.84, "learning_rate": 1.2381208053691277e-05, "loss": 0.0157, "step": 12700 }, { "epoch": 67.37, "learning_rate": 1.1844295302013425e-05, "loss": 0.017, "step": 12800 }, { "epoch": 67.89, "learning_rate": 1.130738255033557e-05, "loss": 0.0155, "step": 12900 }, { "epoch": 68.42, "learning_rate": 1.0770469798657718e-05, "loss": 0.0162, "step": 13000 }, { "epoch": 68.42, "eval_las": 77.04322068129797, "eval_loss": 3.5367863178253174, "eval_runtime": 7.1597, "eval_samples_per_second": 126.961, "eval_steps_per_second": 15.923, "eval_uas": 82.68143261074458, "step": 13000 }, { "epoch": 68.95, "learning_rate": 1.0233557046979868e-05, "loss": 0.0163, "step": 13100 }, { "epoch": 69.47, "learning_rate": 9.696644295302015e-06, "loss": 0.018, "step": 13200 }, { "epoch": 70.0, "learning_rate": 9.159731543624161e-06, "loss": 0.0148, "step": 13300 }, { "epoch": 70.53, "learning_rate": 8.62281879194631e-06, "loss": 0.0146, "step": 13400 }, { "epoch": 71.05, "learning_rate": 8.085906040268458e-06, "loss": 0.0173, "step": 13500 }, { "epoch": 71.05, "eval_las": 77.18123064494412, "eval_loss": 3.553845167160034, "eval_runtime": 7.1565, "eval_samples_per_second": 127.017, "eval_steps_per_second": 15.93, "eval_uas": 82.84637134778511, "step": 13500 }, { "epoch": 71.58, "learning_rate": 7.548993288590605e-06, "loss": 0.0152, "step": 13600 }, { "epoch": 72.11, "learning_rate": 7.012080536912752e-06, "loss": 0.0169, "step": 13700 }, { "epoch": 72.63, "learning_rate": 6.475167785234899e-06, "loss": 0.0154, "step": 13800 }, { "epoch": 73.16, "learning_rate": 5.938255033557048e-06, "loss": 0.0155, "step": 13900 }, { "epoch": 73.68, "learning_rate": 5.401342281879195e-06, "loss": 0.017, "step": 14000 }, { "epoch": 73.68, "eval_las": 77.27884744849872, "eval_loss": 3.5654006004333496, "eval_runtime": 7.1577, "eval_samples_per_second": 126.996, "eval_steps_per_second": 15.927, "eval_uas": 82.85983573448229, "step": 14000 }, { "epoch": 74.21, "learning_rate": 4.864429530201343e-06, "loss": 0.0154, "step": 14100 }, { "epoch": 74.74, "learning_rate": 4.32751677852349e-06, "loss": 0.0161, "step": 14200 }, { "epoch": 75.26, "learning_rate": 3.790604026845638e-06, "loss": 0.0153, "step": 14300 }, { "epoch": 75.79, "learning_rate": 3.253691275167786e-06, "loss": 0.016, "step": 14400 }, { "epoch": 76.32, "learning_rate": 2.716778523489933e-06, "loss": 0.0154, "step": 14500 }, { "epoch": 76.32, "eval_las": 77.34616938198465, "eval_loss": 3.572803497314453, "eval_runtime": 7.1716, "eval_samples_per_second": 126.75, "eval_steps_per_second": 15.896, "eval_uas": 82.89349670122526, "step": 14500 }, { "epoch": 76.84, "learning_rate": 2.1798657718120807e-06, "loss": 0.0158, "step": 14600 }, { "epoch": 77.37, "learning_rate": 1.6429530201342283e-06, "loss": 0.0156, "step": 14700 }, { "epoch": 77.89, "learning_rate": 1.1060402684563759e-06, "loss": 0.0163, "step": 14800 }, { "epoch": 78.42, "learning_rate": 5.691275167785235e-07, "loss": 0.0151, "step": 14900 }, { "epoch": 78.95, "learning_rate": 3.2214765100671145e-08, "loss": 0.0149, "step": 15000 }, { "epoch": 78.95, "eval_las": 77.3865625420762, "eval_loss": 3.5840232372283936, "eval_runtime": 7.1573, "eval_samples_per_second": 127.003, "eval_steps_per_second": 15.928, "eval_uas": 82.93052376464252, "step": 15000 }, { "epoch": 78.95, "step": 15000, "total_flos": 8.008377828590592e+16, "train_loss": 0.12927290275096892, "train_runtime": 8965.3483, "train_samples_per_second": 53.539, "train_steps_per_second": 1.673 } ], "max_steps": 15000, "num_train_epochs": 79, "total_flos": 8.008377828590592e+16, "trial_name": null, "trial_params": null }