{ "best_metric": 29.294196353905335, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Korean-GSD/checkpoint-1500", "epoch": 28.985507246376812, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.72, "learning_rate": 7.52e-05, "loss": 4.639, "step": 100 }, { "epoch": 1.45, "learning_rate": 7.949530201342283e-05, "loss": 3.2377, "step": 200 }, { "epoch": 2.17, "learning_rate": 7.895838926174497e-05, "loss": 3.0918, "step": 300 }, { "epoch": 2.9, "learning_rate": 7.842147651006712e-05, "loss": 2.9885, "step": 400 }, { "epoch": 3.62, "learning_rate": 7.788456375838927e-05, "loss": 2.9165, "step": 500 }, { "epoch": 3.62, "eval_las": 27.86419133634387, "eval_loss": 2.9203531742095947, "eval_runtime": 6.1866, "eval_samples_per_second": 153.558, "eval_steps_per_second": 19.235, "eval_uas": 44.94062552266265, "step": 500 }, { "epoch": 4.35, "learning_rate": 7.734765100671142e-05, "loss": 2.8823, "step": 600 }, { "epoch": 5.07, "learning_rate": 7.681073825503357e-05, "loss": 2.8323, "step": 700 }, { "epoch": 5.8, "learning_rate": 7.627382550335572e-05, "loss": 2.7932, "step": 800 }, { "epoch": 6.52, "learning_rate": 7.573691275167786e-05, "loss": 2.753, "step": 900 }, { "epoch": 7.25, "learning_rate": 7.52e-05, "loss": 2.7026, "step": 1000 }, { "epoch": 7.25, "eval_las": 29.118581702625857, "eval_loss": 2.91562819480896, "eval_runtime": 6.1737, "eval_samples_per_second": 153.878, "eval_steps_per_second": 19.275, "eval_uas": 46.52115738417796, "step": 1000 }, { "epoch": 7.97, "learning_rate": 7.466308724832215e-05, "loss": 2.6742, "step": 1100 }, { "epoch": 8.7, "learning_rate": 7.41261744966443e-05, "loss": 2.5757, "step": 1200 }, { "epoch": 9.42, "learning_rate": 7.358926174496644e-05, "loss": 2.5635, "step": 1300 }, { "epoch": 10.14, "learning_rate": 7.305234899328859e-05, "loss": 2.5617, "step": 1400 }, { "epoch": 10.87, "learning_rate": 7.251543624161074e-05, "loss": 2.4701, "step": 1500 }, { "epoch": 10.87, "eval_las": 29.294196353905335, "eval_loss": 3.0349910259246826, "eval_runtime": 6.1916, "eval_samples_per_second": 153.433, "eval_steps_per_second": 19.219, "eval_uas": 47.00618832580699, "step": 1500 }, { "epoch": 11.59, "learning_rate": 7.197852348993289e-05, "loss": 2.3668, "step": 1600 }, { "epoch": 12.32, "learning_rate": 7.144161073825504e-05, "loss": 2.3771, "step": 1700 }, { "epoch": 13.04, "learning_rate": 7.090469798657718e-05, "loss": 2.3208, "step": 1800 }, { "epoch": 13.77, "learning_rate": 7.036778523489933e-05, "loss": 2.2402, "step": 1900 }, { "epoch": 14.49, "learning_rate": 6.983087248322148e-05, "loss": 2.1529, "step": 2000 }, { "epoch": 14.49, "eval_las": 29.0600434855327, "eval_loss": 3.292515277862549, "eval_runtime": 6.1874, "eval_samples_per_second": 153.538, "eval_steps_per_second": 19.233, "eval_uas": 47.09817695266767, "step": 2000 }, { "epoch": 15.22, "learning_rate": 6.929395973154363e-05, "loss": 2.1538, "step": 2100 }, { "epoch": 15.94, "learning_rate": 6.875704697986578e-05, "loss": 2.0934, "step": 2200 }, { "epoch": 16.67, "learning_rate": 6.822013422818793e-05, "loss": 1.9925, "step": 2300 }, { "epoch": 17.39, "learning_rate": 6.768322147651007e-05, "loss": 1.9471, "step": 2400 }, { "epoch": 18.12, "learning_rate": 6.714630872483222e-05, "loss": 1.9399, "step": 2500 }, { "epoch": 18.12, "eval_las": 28.85934102692758, "eval_loss": 3.839879274368286, "eval_runtime": 6.1995, "eval_samples_per_second": 153.237, "eval_steps_per_second": 19.195, "eval_uas": 46.28700451580532, "step": 2500 }, { "epoch": 18.84, "learning_rate": 6.660939597315437e-05, "loss": 1.8651, "step": 2600 }, { "epoch": 19.57, "learning_rate": 6.607248322147652e-05, "loss": 1.7929, "step": 2700 }, { "epoch": 20.29, "learning_rate": 6.553557046979867e-05, "loss": 1.7378, "step": 2800 }, { "epoch": 21.01, "learning_rate": 6.499865771812081e-05, "loss": 1.7862, "step": 2900 }, { "epoch": 21.74, "learning_rate": 6.446174496644296e-05, "loss": 1.6585, "step": 3000 }, { "epoch": 21.74, "eval_las": 28.349222277972906, "eval_loss": 4.253769397735596, "eval_runtime": 6.1742, "eval_samples_per_second": 153.865, "eval_steps_per_second": 19.274, "eval_uas": 45.63472152533868, "step": 3000 }, { "epoch": 22.46, "learning_rate": 6.392483221476511e-05, "loss": 1.6568, "step": 3100 }, { "epoch": 23.19, "learning_rate": 6.338791946308726e-05, "loss": 1.625, "step": 3200 }, { "epoch": 23.91, "learning_rate": 6.28510067114094e-05, "loss": 1.628, "step": 3300 }, { "epoch": 24.64, "learning_rate": 6.231409395973154e-05, "loss": 1.5299, "step": 3400 }, { "epoch": 25.36, "learning_rate": 6.177718120805369e-05, "loss": 1.5284, "step": 3500 }, { "epoch": 25.36, "eval_las": 28.6001003512293, "eval_loss": 4.6269941329956055, "eval_runtime": 6.1796, "eval_samples_per_second": 153.733, "eval_steps_per_second": 19.257, "eval_uas": 45.37548084964041, "step": 3500 }, { "epoch": 26.09, "learning_rate": 6.124026845637584e-05, "loss": 1.5192, "step": 3600 }, { "epoch": 26.81, "learning_rate": 6.070335570469799e-05, "loss": 1.4848, "step": 3700 }, { "epoch": 27.54, "learning_rate": 6.0166442953020136e-05, "loss": 1.4342, "step": 3800 }, { "epoch": 28.26, "learning_rate": 5.962953020134229e-05, "loss": 1.418, "step": 3900 }, { "epoch": 28.99, "learning_rate": 5.909261744966444e-05, "loss": 1.4311, "step": 4000 }, { "epoch": 28.99, "eval_las": 28.299046663321626, "eval_loss": 4.774014472961426, "eval_runtime": 6.1783, "eval_samples_per_second": 153.765, "eval_steps_per_second": 19.261, "eval_uas": 45.16641578859341, "step": 4000 }, { "epoch": 28.99, "step": 4000, "total_flos": 2.1298234165100544e+16, "train_loss": 2.234064235687256, "train_runtime": 2087.3588, "train_samples_per_second": 229.956, "train_steps_per_second": 7.186 } ], "max_steps": 15000, "num_train_epochs": 109, "total_flos": 2.1298234165100544e+16, "trial_name": null, "trial_params": null }