{ "best_metric": 74.99219127126105, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Hindi-HDTB/checkpoint-15000", "epoch": 36.05769230769231, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 7.52e-05, "loss": 4.0483, "step": 100 }, { "epoch": 0.48, "learning_rate": 7.949530201342283e-05, "loss": 1.7391, "step": 200 }, { "epoch": 0.72, "learning_rate": 7.895838926174497e-05, "loss": 1.4589, "step": 300 }, { "epoch": 0.96, "learning_rate": 7.842147651006712e-05, "loss": 1.2986, "step": 400 }, { "epoch": 1.2, "learning_rate": 7.788456375838927e-05, "loss": 1.1463, "step": 500 }, { "epoch": 1.2, "eval_las": 68.06655876423319, "eval_loss": 1.2132718563079834, "eval_runtime": 11.8598, "eval_samples_per_second": 139.885, "eval_steps_per_second": 17.538, "eval_uas": 77.91691512621746, "step": 500 }, { "epoch": 1.44, "learning_rate": 7.734765100671142e-05, "loss": 1.1162, "step": 600 }, { "epoch": 1.68, "learning_rate": 7.681073825503357e-05, "loss": 1.0738, "step": 700 }, { "epoch": 1.92, "learning_rate": 7.627382550335572e-05, "loss": 1.0485, "step": 800 }, { "epoch": 2.16, "learning_rate": 7.573691275167786e-05, "loss": 0.9272, "step": 900 }, { "epoch": 2.4, "learning_rate": 7.52e-05, "loss": 0.901, "step": 1000 }, { "epoch": 2.4, "eval_las": 71.78635318170203, "eval_loss": 1.053443193435669, "eval_runtime": 11.8478, "eval_samples_per_second": 140.026, "eval_steps_per_second": 17.556, "eval_uas": 80.90410881108555, "step": 1000 }, { "epoch": 2.64, "learning_rate": 7.466308724832215e-05, "loss": 0.8841, "step": 1100 }, { "epoch": 2.88, "learning_rate": 7.41261744966443e-05, "loss": 0.8871, "step": 1200 }, { "epoch": 3.12, "learning_rate": 7.358926174496644e-05, "loss": 0.7905, "step": 1300 }, { "epoch": 3.37, "learning_rate": 7.305234899328859e-05, "loss": 0.7284, "step": 1400 }, { "epoch": 3.61, "learning_rate": 7.251543624161074e-05, "loss": 0.7392, "step": 1500 }, { "epoch": 3.61, "eval_las": 73.2231592696709, "eval_loss": 1.0388336181640625, "eval_runtime": 11.8527, "eval_samples_per_second": 139.968, "eval_steps_per_second": 17.549, "eval_uas": 81.9320214669052, "step": 1500 }, { "epoch": 3.85, "learning_rate": 7.197852348993289e-05, "loss": 0.7587, "step": 1600 }, { "epoch": 4.09, "learning_rate": 7.144161073825504e-05, "loss": 0.6997, "step": 1700 }, { "epoch": 4.33, "learning_rate": 7.090469798657718e-05, "loss": 0.5892, "step": 1800 }, { "epoch": 4.57, "learning_rate": 7.036778523489933e-05, "loss": 0.6111, "step": 1900 }, { "epoch": 4.81, "learning_rate": 6.983087248322148e-05, "loss": 0.6103, "step": 2000 }, { "epoch": 4.81, "eval_las": 73.0783428457847, "eval_loss": 1.0303574800491333, "eval_runtime": 11.8592, "eval_samples_per_second": 139.892, "eval_steps_per_second": 17.539, "eval_uas": 81.92350285373541, "step": 2000 }, { "epoch": 5.05, "learning_rate": 6.929395973154363e-05, "loss": 0.6054, "step": 2100 }, { "epoch": 5.29, "learning_rate": 6.875704697986578e-05, "loss": 0.4648, "step": 2200 }, { "epoch": 5.53, "learning_rate": 6.822013422818793e-05, "loss": 0.4894, "step": 2300 }, { "epoch": 5.77, "learning_rate": 6.768322147651007e-05, "loss": 0.5087, "step": 2400 }, { "epoch": 6.01, "learning_rate": 6.714630872483222e-05, "loss": 0.5131, "step": 2500 }, { "epoch": 6.01, "eval_las": 73.69452253173183, "eval_loss": 1.1480714082717896, "eval_runtime": 11.8558, "eval_samples_per_second": 139.932, "eval_steps_per_second": 17.544, "eval_uas": 82.44881733253825, "step": 2500 }, { "epoch": 6.25, "learning_rate": 6.660939597315437e-05, "loss": 0.3686, "step": 2600 }, { "epoch": 6.49, "learning_rate": 6.607248322147652e-05, "loss": 0.381, "step": 2700 }, { "epoch": 6.73, "learning_rate": 6.553557046979867e-05, "loss": 0.408, "step": 2800 }, { "epoch": 6.97, "learning_rate": 6.499865771812081e-05, "loss": 0.4176, "step": 2900 }, { "epoch": 7.21, "learning_rate": 6.446174496644296e-05, "loss": 0.3048, "step": 3000 }, { "epoch": 7.21, "eval_las": 73.29698725047562, "eval_loss": 1.3656591176986694, "eval_runtime": 11.8603, "eval_samples_per_second": 139.878, "eval_steps_per_second": 17.537, "eval_uas": 82.40338472896612, "step": 3000 }, { "epoch": 7.45, "learning_rate": 6.392483221476511e-05, "loss": 0.2996, "step": 3100 }, { "epoch": 7.69, "learning_rate": 6.338791946308726e-05, "loss": 0.3191, "step": 3200 }, { "epoch": 7.93, "learning_rate": 6.28510067114094e-05, "loss": 0.3222, "step": 3300 }, { "epoch": 8.17, "learning_rate": 6.231409395973154e-05, "loss": 0.2624, "step": 3400 }, { "epoch": 8.41, "learning_rate": 6.177718120805369e-05, "loss": 0.2485, "step": 3500 }, { "epoch": 8.41, "eval_las": 73.33106170315473, "eval_loss": 1.4516910314559937, "eval_runtime": 11.8613, "eval_samples_per_second": 139.867, "eval_steps_per_second": 17.536, "eval_uas": 82.43461964392196, "step": 3500 }, { "epoch": 8.65, "learning_rate": 6.124026845637584e-05, "loss": 0.247, "step": 3600 }, { "epoch": 8.89, "learning_rate": 6.070335570469799e-05, "loss": 0.269, "step": 3700 }, { "epoch": 9.13, "learning_rate": 6.0166442953020136e-05, "loss": 0.2213, "step": 3800 }, { "epoch": 9.38, "learning_rate": 5.962953020134229e-05, "loss": 0.1917, "step": 3900 }, { "epoch": 9.62, "learning_rate": 5.909261744966444e-05, "loss": 0.2013, "step": 4000 }, { "epoch": 9.62, "eval_las": 73.58945963597127, "eval_loss": 1.5729166269302368, "eval_runtime": 11.8551, "eval_samples_per_second": 139.94, "eval_steps_per_second": 17.545, "eval_uas": 82.28980322003578, "step": 4000 }, { "epoch": 9.86, "learning_rate": 5.855570469798659e-05, "loss": 0.2064, "step": 4100 }, { "epoch": 10.1, "learning_rate": 5.8018791946308735e-05, "loss": 0.1845, "step": 4200 }, { "epoch": 10.34, "learning_rate": 5.7481879194630884e-05, "loss": 0.1576, "step": 4300 }, { "epoch": 10.58, "learning_rate": 5.694496644295303e-05, "loss": 0.1647, "step": 4400 }, { "epoch": 10.82, "learning_rate": 5.6408053691275166e-05, "loss": 0.17, "step": 4500 }, { "epoch": 10.82, "eval_las": 73.72859698441094, "eval_loss": 1.6659384965896606, "eval_runtime": 11.8486, "eval_samples_per_second": 140.016, "eval_steps_per_second": 17.555, "eval_uas": 82.71289434080131, "step": 4500 }, { "epoch": 11.06, "learning_rate": 5.5871140939597315e-05, "loss": 0.1642, "step": 4600 }, { "epoch": 11.3, "learning_rate": 5.533422818791946e-05, "loss": 0.1259, "step": 4700 }, { "epoch": 11.54, "learning_rate": 5.479731543624161e-05, "loss": 0.1365, "step": 4800 }, { "epoch": 11.78, "learning_rate": 5.426040268456376e-05, "loss": 0.1398, "step": 4900 }, { "epoch": 12.02, "learning_rate": 5.372348993288591e-05, "loss": 0.1447, "step": 5000 }, { "epoch": 12.02, "eval_las": 74.01539029446006, "eval_loss": 1.8783390522003174, "eval_runtime": 11.8594, "eval_samples_per_second": 139.889, "eval_steps_per_second": 17.539, "eval_uas": 82.87474799102706, "step": 5000 }, { "epoch": 12.26, "learning_rate": 5.3186577181208056e-05, "loss": 0.1091, "step": 5100 }, { "epoch": 12.5, "learning_rate": 5.2649664429530204e-05, "loss": 0.1146, "step": 5200 }, { "epoch": 12.74, "learning_rate": 5.211275167785235e-05, "loss": 0.1186, "step": 5300 }, { "epoch": 12.98, "learning_rate": 5.15758389261745e-05, "loss": 0.1171, "step": 5400 }, { "epoch": 13.22, "learning_rate": 5.1038926174496656e-05, "loss": 0.0908, "step": 5500 }, { "epoch": 13.22, "eval_las": 73.81094357838543, "eval_loss": 2.0186927318573, "eval_runtime": 11.8461, "eval_samples_per_second": 140.046, "eval_steps_per_second": 17.558, "eval_uas": 82.64758497316636, "step": 5500 }, { "epoch": 13.46, "learning_rate": 5.050201342281879e-05, "loss": 0.0969, "step": 5600 }, { "epoch": 13.7, "learning_rate": 4.996510067114094e-05, "loss": 0.099, "step": 5700 }, { "epoch": 13.94, "learning_rate": 4.942818791946309e-05, "loss": 0.0998, "step": 5800 }, { "epoch": 14.18, "learning_rate": 4.8891275167785235e-05, "loss": 0.078, "step": 5900 }, { "epoch": 14.42, "learning_rate": 4.835436241610738e-05, "loss": 0.0827, "step": 6000 }, { "epoch": 14.42, "eval_las": 74.11761365249737, "eval_loss": 2.0184013843536377, "eval_runtime": 11.8521, "eval_samples_per_second": 139.975, "eval_steps_per_second": 17.55, "eval_uas": 82.9343782832155, "step": 6000 }, { "epoch": 14.66, "learning_rate": 4.781744966442953e-05, "loss": 0.084, "step": 6100 }, { "epoch": 14.9, "learning_rate": 4.728053691275168e-05, "loss": 0.0891, "step": 6200 }, { "epoch": 15.14, "learning_rate": 4.674362416107383e-05, "loss": 0.076, "step": 6300 }, { "epoch": 15.38, "learning_rate": 4.6206711409395976e-05, "loss": 0.0747, "step": 6400 }, { "epoch": 15.62, "learning_rate": 4.5669798657718125e-05, "loss": 0.0738, "step": 6500 }, { "epoch": 15.62, "eval_las": 74.12045319022063, "eval_loss": 2.130760431289673, "eval_runtime": 11.8527, "eval_samples_per_second": 139.968, "eval_steps_per_second": 17.549, "eval_uas": 82.88042706647359, "step": 6500 }, { "epoch": 15.87, "learning_rate": 4.513288590604027e-05, "loss": 0.0725, "step": 6600 }, { "epoch": 16.11, "learning_rate": 4.459597315436242e-05, "loss": 0.0669, "step": 6700 }, { "epoch": 16.35, "learning_rate": 4.405906040268456e-05, "loss": 0.0632, "step": 6800 }, { "epoch": 16.59, "learning_rate": 4.352214765100671e-05, "loss": 0.0649, "step": 6900 }, { "epoch": 16.83, "learning_rate": 4.298523489932886e-05, "loss": 0.0667, "step": 7000 }, { "epoch": 16.83, "eval_las": 74.06650197347872, "eval_loss": 2.201524019241333, "eval_runtime": 11.8493, "eval_samples_per_second": 140.008, "eval_steps_per_second": 17.554, "eval_uas": 82.75548740665019, "step": 7000 }, { "epoch": 17.07, "learning_rate": 4.244832214765101e-05, "loss": 0.0607, "step": 7100 }, { "epoch": 17.31, "learning_rate": 4.1911409395973156e-05, "loss": 0.0532, "step": 7200 }, { "epoch": 17.55, "learning_rate": 4.1374496644295304e-05, "loss": 0.0544, "step": 7300 }, { "epoch": 17.79, "learning_rate": 4.083758389261745e-05, "loss": 0.0558, "step": 7400 }, { "epoch": 18.03, "learning_rate": 4.03006711409396e-05, "loss": 0.0537, "step": 7500 }, { "epoch": 18.03, "eval_las": 74.13465087883692, "eval_loss": 2.413492202758789, "eval_runtime": 11.8547, "eval_samples_per_second": 139.944, "eval_steps_per_second": 17.546, "eval_uas": 82.8719084533038, "step": 7500 }, { "epoch": 18.27, "learning_rate": 3.976375838926175e-05, "loss": 0.0462, "step": 7600 }, { "epoch": 18.51, "learning_rate": 3.92268456375839e-05, "loss": 0.0484, "step": 7700 }, { "epoch": 18.75, "learning_rate": 3.8689932885906045e-05, "loss": 0.0477, "step": 7800 }, { "epoch": 18.99, "learning_rate": 3.815302013422819e-05, "loss": 0.0527, "step": 7900 }, { "epoch": 19.23, "learning_rate": 3.761610738255034e-05, "loss": 0.0394, "step": 8000 }, { "epoch": 19.23, "eval_las": 74.10625550160434, "eval_loss": 2.4247446060180664, "eval_runtime": 11.8548, "eval_samples_per_second": 139.943, "eval_steps_per_second": 17.546, "eval_uas": 82.85203168924099, "step": 8000 }, { "epoch": 19.47, "learning_rate": 3.707919463087249e-05, "loss": 0.0407, "step": 8100 }, { "epoch": 19.71, "learning_rate": 3.654228187919463e-05, "loss": 0.045, "step": 8200 }, { "epoch": 19.95, "learning_rate": 3.600536912751678e-05, "loss": 0.044, "step": 8300 }, { "epoch": 20.19, "learning_rate": 3.546845637583893e-05, "loss": 0.0336, "step": 8400 }, { "epoch": 20.43, "learning_rate": 3.4931543624161076e-05, "loss": 0.0358, "step": 8500 }, { "epoch": 20.43, "eval_las": 74.19996024647187, "eval_loss": 2.439199209213257, "eval_runtime": 11.8496, "eval_samples_per_second": 140.005, "eval_steps_per_second": 17.553, "eval_uas": 83.01672487719, "step": 8500 }, { "epoch": 20.67, "learning_rate": 3.4394630872483224e-05, "loss": 0.0352, "step": 8600 }, { "epoch": 20.91, "learning_rate": 3.385771812080537e-05, "loss": 0.038, "step": 8700 }, { "epoch": 21.15, "learning_rate": 3.332080536912752e-05, "loss": 0.0314, "step": 8800 }, { "epoch": 21.39, "learning_rate": 3.278389261744967e-05, "loss": 0.0323, "step": 8900 }, { "epoch": 21.63, "learning_rate": 3.224697986577182e-05, "loss": 0.0293, "step": 9000 }, { "epoch": 21.63, "eval_las": 74.28230684044638, "eval_loss": 2.6793813705444336, "eval_runtime": 11.8495, "eval_samples_per_second": 140.006, "eval_steps_per_second": 17.553, "eval_uas": 82.96277366044808, "step": 9000 }, { "epoch": 21.88, "learning_rate": 3.1710067114093965e-05, "loss": 0.0332, "step": 9100 }, { "epoch": 22.12, "learning_rate": 3.1173154362416114e-05, "loss": 0.0302, "step": 9200 }, { "epoch": 22.36, "learning_rate": 3.0636241610738255e-05, "loss": 0.0268, "step": 9300 }, { "epoch": 22.6, "learning_rate": 3.0099328859060403e-05, "loss": 0.0273, "step": 9400 }, { "epoch": 22.84, "learning_rate": 2.956241610738255e-05, "loss": 0.0275, "step": 9500 }, { "epoch": 22.84, "eval_las": 74.25391146321378, "eval_loss": 2.634089231491089, "eval_runtime": 11.8524, "eval_samples_per_second": 139.972, "eval_steps_per_second": 17.549, "eval_uas": 82.99400857540392, "step": 9500 }, { "epoch": 23.08, "learning_rate": 2.90255033557047e-05, "loss": 0.0277, "step": 9600 }, { "epoch": 23.32, "learning_rate": 2.8488590604026848e-05, "loss": 0.0245, "step": 9700 }, { "epoch": 23.56, "learning_rate": 2.7951677852348996e-05, "loss": 0.0228, "step": 9800 }, { "epoch": 23.8, "learning_rate": 2.741476510067114e-05, "loss": 0.0242, "step": 9900 }, { "epoch": 24.04, "learning_rate": 2.687785234899329e-05, "loss": 0.0235, "step": 10000 }, { "epoch": 24.04, "eval_las": 74.29366499133941, "eval_loss": 2.7188923358917236, "eval_runtime": 11.8499, "eval_samples_per_second": 140.001, "eval_steps_per_second": 17.553, "eval_uas": 83.09339239571798, "step": 10000 }, { "epoch": 24.28, "learning_rate": 2.6340939597315438e-05, "loss": 0.0207, "step": 10100 }, { "epoch": 24.52, "learning_rate": 2.5804026845637586e-05, "loss": 0.0204, "step": 10200 }, { "epoch": 24.76, "learning_rate": 2.5267114093959734e-05, "loss": 0.0239, "step": 10300 }, { "epoch": 25.0, "learning_rate": 2.4730201342281882e-05, "loss": 0.0218, "step": 10400 }, { "epoch": 25.24, "learning_rate": 2.4193288590604027e-05, "loss": 0.0167, "step": 10500 }, { "epoch": 25.24, "eval_las": 74.49243263196752, "eval_loss": 2.821323871612549, "eval_runtime": 11.852, "eval_samples_per_second": 139.976, "eval_steps_per_second": 17.55, "eval_uas": 83.17005991424597, "step": 10500 }, { "epoch": 25.48, "learning_rate": 2.3656375838926175e-05, "loss": 0.0189, "step": 10600 }, { "epoch": 25.72, "learning_rate": 2.3119463087248324e-05, "loss": 0.0189, "step": 10700 }, { "epoch": 25.96, "learning_rate": 2.2582550335570472e-05, "loss": 0.0202, "step": 10800 }, { "epoch": 26.2, "learning_rate": 2.204563758389262e-05, "loss": 0.0161, "step": 10900 }, { "epoch": 26.44, "learning_rate": 2.150872483221477e-05, "loss": 0.0157, "step": 11000 }, { "epoch": 26.44, "eval_las": 74.57193968821876, "eval_loss": 2.849531650543213, "eval_runtime": 11.8513, "eval_samples_per_second": 139.985, "eval_steps_per_second": 17.551, "eval_uas": 83.22117159326461, "step": 11000 }, { "epoch": 26.68, "learning_rate": 2.0971812080536913e-05, "loss": 0.0166, "step": 11100 }, { "epoch": 26.92, "learning_rate": 2.043489932885906e-05, "loss": 0.0156, "step": 11200 }, { "epoch": 27.16, "learning_rate": 1.989798657718121e-05, "loss": 0.0143, "step": 11300 }, { "epoch": 27.4, "learning_rate": 1.9361073825503358e-05, "loss": 0.0141, "step": 11400 }, { "epoch": 27.64, "learning_rate": 1.8824161073825503e-05, "loss": 0.014, "step": 11500 }, { "epoch": 27.64, "eval_las": 74.45267910384189, "eval_loss": 2.8970541954040527, "eval_runtime": 11.8508, "eval_samples_per_second": 139.991, "eval_steps_per_second": 17.552, "eval_uas": 83.05931794303886, "step": 11500 }, { "epoch": 27.88, "learning_rate": 1.828724832214765e-05, "loss": 0.0135, "step": 11600 }, { "epoch": 28.12, "learning_rate": 1.77503355704698e-05, "loss": 0.0124, "step": 11700 }, { "epoch": 28.37, "learning_rate": 1.7213422818791948e-05, "loss": 0.0112, "step": 11800 }, { "epoch": 28.61, "learning_rate": 1.6676510067114096e-05, "loss": 0.0115, "step": 11900 }, { "epoch": 28.85, "learning_rate": 1.6139597315436244e-05, "loss": 0.0104, "step": 12000 }, { "epoch": 28.85, "eval_las": 74.57761876366527, "eval_loss": 2.9912426471710205, "eval_runtime": 11.851, "eval_samples_per_second": 139.988, "eval_steps_per_second": 17.551, "eval_uas": 83.14734361245989, "step": 12000 }, { "epoch": 29.09, "learning_rate": 1.560268456375839e-05, "loss": 0.0106, "step": 12100 }, { "epoch": 29.33, "learning_rate": 1.5071140939597317e-05, "loss": 0.0101, "step": 12200 }, { "epoch": 29.57, "learning_rate": 1.4534228187919464e-05, "loss": 0.0096, "step": 12300 }, { "epoch": 29.81, "learning_rate": 1.3997315436241612e-05, "loss": 0.0093, "step": 12400 }, { "epoch": 30.05, "learning_rate": 1.346040268456376e-05, "loss": 0.0093, "step": 12500 }, { "epoch": 30.05, "eval_las": 74.54922338643269, "eval_loss": 3.005786418914795, "eval_runtime": 11.8568, "eval_samples_per_second": 139.92, "eval_steps_per_second": 17.543, "eval_uas": 83.15586222562966, "step": 12500 }, { "epoch": 30.29, "learning_rate": 1.2923489932885907e-05, "loss": 0.0084, "step": 12600 }, { "epoch": 30.53, "learning_rate": 1.2386577181208055e-05, "loss": 0.0088, "step": 12700 }, { "epoch": 30.77, "learning_rate": 1.1849664429530204e-05, "loss": 0.0075, "step": 12800 }, { "epoch": 31.01, "learning_rate": 1.131275167785235e-05, "loss": 0.0094, "step": 12900 }, { "epoch": 31.25, "learning_rate": 1.0775838926174498e-05, "loss": 0.0071, "step": 13000 }, { "epoch": 31.25, "eval_las": 74.84453530965159, "eval_loss": 3.0800094604492188, "eval_runtime": 11.8601, "eval_samples_per_second": 139.881, "eval_steps_per_second": 17.538, "eval_uas": 83.40574154527643, "step": 13000 }, { "epoch": 31.49, "learning_rate": 1.0238926174496647e-05, "loss": 0.0065, "step": 13100 }, { "epoch": 31.73, "learning_rate": 9.702013422818793e-06, "loss": 0.0067, "step": 13200 }, { "epoch": 31.97, "learning_rate": 9.165100671140941e-06, "loss": 0.0069, "step": 13300 }, { "epoch": 32.21, "learning_rate": 8.628187919463088e-06, "loss": 0.0056, "step": 13400 }, { "epoch": 32.45, "learning_rate": 8.091275167785234e-06, "loss": 0.0052, "step": 13500 }, { "epoch": 32.45, "eval_las": 74.84453530965159, "eval_loss": 3.167156219482422, "eval_runtime": 11.8563, "eval_samples_per_second": 139.926, "eval_steps_per_second": 17.543, "eval_uas": 83.41993923389272, "step": 13500 }, { "epoch": 32.69, "learning_rate": 7.5543624161073835e-06, "loss": 0.0057, "step": 13600 }, { "epoch": 32.93, "learning_rate": 7.017449664429531e-06, "loss": 0.0057, "step": 13700 }, { "epoch": 33.17, "learning_rate": 6.480536912751678e-06, "loss": 0.0054, "step": 13800 }, { "epoch": 33.41, "learning_rate": 5.943624161073826e-06, "loss": 0.0048, "step": 13900 }, { "epoch": 33.65, "learning_rate": 5.406711409395974e-06, "loss": 0.0054, "step": 14000 }, { "epoch": 33.65, "eval_las": 74.90132606411676, "eval_loss": 3.178938627243042, "eval_runtime": 11.8514, "eval_samples_per_second": 139.983, "eval_steps_per_second": 17.551, "eval_uas": 83.49376721469744, "step": 14000 }, { "epoch": 33.89, "learning_rate": 4.869798657718121e-06, "loss": 0.0049, "step": 14100 }, { "epoch": 34.13, "learning_rate": 4.332885906040269e-06, "loss": 0.0039, "step": 14200 }, { "epoch": 34.38, "learning_rate": 3.795973154362416e-06, "loss": 0.005, "step": 14300 }, { "epoch": 34.62, "learning_rate": 3.259060402684564e-06, "loss": 0.004, "step": 14400 }, { "epoch": 34.86, "learning_rate": 2.722147651006712e-06, "loss": 0.0042, "step": 14500 }, { "epoch": 34.86, "eval_las": 74.9325609790726, "eval_loss": 3.2010536193847656, "eval_runtime": 11.8566, "eval_samples_per_second": 139.922, "eval_steps_per_second": 17.543, "eval_uas": 83.53636028054633, "step": 14500 }, { "epoch": 35.1, "learning_rate": 2.185234899328859e-06, "loss": 0.0038, "step": 14600 }, { "epoch": 35.34, "learning_rate": 1.648322147651007e-06, "loss": 0.0045, "step": 14700 }, { "epoch": 35.58, "learning_rate": 1.1114093959731544e-06, "loss": 0.0036, "step": 14800 }, { "epoch": 35.82, "learning_rate": 5.74496644295302e-07, "loss": 0.0042, "step": 14900 }, { "epoch": 36.06, "learning_rate": 3.758389261744967e-08, "loss": 0.0037, "step": 15000 }, { "epoch": 36.06, "eval_las": 74.99219127126105, "eval_loss": 3.1982181072235107, "eval_runtime": 11.8515, "eval_samples_per_second": 139.982, "eval_steps_per_second": 17.55, "eval_uas": 83.58747195956498, "step": 15000 }, { "epoch": 36.06, "step": 15000, "total_flos": 8.010080992700006e+16, "train_loss": 0.2272949548403422, "train_runtime": 8371.5337, "train_samples_per_second": 57.337, "train_steps_per_second": 1.792 } ], "max_steps": 15000, "num_train_epochs": 37, "total_flos": 8.010080992700006e+16, "trial_name": null, "trial_params": null }