{ "best_metric": 18.92319873317498, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Tamil-TTB/checkpoint-500", "epoch": 230.76923076923077, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.69, "learning_rate": 7.52e-05, "loss": 4.575, "step": 100 }, { "epoch": 15.38, "learning_rate": 7.949530201342283e-05, "loss": 2.7427, "step": 200 }, { "epoch": 23.08, "learning_rate": 7.895838926174497e-05, "loss": 1.7932, "step": 300 }, { "epoch": 30.77, "learning_rate": 7.842147651006712e-05, "loss": 1.1589, "step": 400 }, { "epoch": 38.46, "learning_rate": 7.788456375838927e-05, "loss": 0.787, "step": 500 }, { "epoch": 38.46, "eval_las": 18.92319873317498, "eval_loss": 8.03709888458252, "eval_runtime": 0.5456, "eval_samples_per_second": 146.634, "eval_steps_per_second": 18.329, "eval_uas": 36.18368962787015, "step": 500 }, { "epoch": 46.15, "learning_rate": 7.734765100671142e-05, "loss": 0.5851, "step": 600 }, { "epoch": 53.85, "learning_rate": 7.681073825503357e-05, "loss": 0.4576, "step": 700 }, { "epoch": 61.54, "learning_rate": 7.627382550335572e-05, "loss": 0.3946, "step": 800 }, { "epoch": 69.23, "learning_rate": 7.573691275167786e-05, "loss": 0.3746, "step": 900 }, { "epoch": 76.92, "learning_rate": 7.52e-05, "loss": 0.358, "step": 1000 }, { "epoch": 76.92, "eval_las": 16.943784639746635, "eval_loss": 11.071226119995117, "eval_runtime": 0.5411, "eval_samples_per_second": 147.855, "eval_steps_per_second": 18.482, "eval_uas": 33.966745843230406, "step": 1000 }, { "epoch": 84.62, "learning_rate": 7.466308724832215e-05, "loss": 0.3352, "step": 1100 }, { "epoch": 92.31, "learning_rate": 7.41261744966443e-05, "loss": 0.3213, "step": 1200 }, { "epoch": 100.0, "learning_rate": 7.358926174496644e-05, "loss": 0.322, "step": 1300 }, { "epoch": 107.69, "learning_rate": 7.305234899328859e-05, "loss": 0.3044, "step": 1400 }, { "epoch": 115.38, "learning_rate": 7.251543624161074e-05, "loss": 0.2942, "step": 1500 }, { "epoch": 115.38, "eval_las": 17.02296120348377, "eval_loss": 12.35420036315918, "eval_runtime": 0.542, "eval_samples_per_second": 147.589, "eval_steps_per_second": 18.449, "eval_uas": 34.837688044338876, "step": 1500 }, { "epoch": 123.08, "learning_rate": 7.197852348993289e-05, "loss": 0.3028, "step": 1600 }, { "epoch": 130.77, "learning_rate": 7.144161073825504e-05, "loss": 0.2954, "step": 1700 }, { "epoch": 138.46, "learning_rate": 7.090469798657718e-05, "loss": 0.2822, "step": 1800 }, { "epoch": 146.15, "learning_rate": 7.036778523489933e-05, "loss": 0.2814, "step": 1900 }, { "epoch": 153.85, "learning_rate": 6.983087248322148e-05, "loss": 0.2812, "step": 2000 }, { "epoch": 153.85, "eval_las": 17.81472684085511, "eval_loss": 13.284212112426758, "eval_runtime": 0.5416, "eval_samples_per_second": 147.703, "eval_steps_per_second": 18.463, "eval_uas": 36.57957244655582, "step": 2000 }, { "epoch": 161.54, "learning_rate": 6.929395973154363e-05, "loss": 0.2837, "step": 2100 }, { "epoch": 169.23, "learning_rate": 6.875704697986578e-05, "loss": 0.2887, "step": 2200 }, { "epoch": 176.92, "learning_rate": 6.822013422818793e-05, "loss": 0.278, "step": 2300 }, { "epoch": 184.62, "learning_rate": 6.768322147651007e-05, "loss": 0.2721, "step": 2400 }, { "epoch": 192.31, "learning_rate": 6.714630872483222e-05, "loss": 0.2727, "step": 2500 }, { "epoch": 192.31, "eval_las": 16.706254948535236, "eval_loss": 14.351984977722168, "eval_runtime": 0.5408, "eval_samples_per_second": 147.943, "eval_steps_per_second": 18.493, "eval_uas": 34.36262866191607, "step": 2500 }, { "epoch": 200.0, "learning_rate": 6.660939597315437e-05, "loss": 0.2718, "step": 2600 }, { "epoch": 207.69, "learning_rate": 6.607248322147652e-05, "loss": 0.264, "step": 2700 }, { "epoch": 215.38, "learning_rate": 6.553557046979867e-05, "loss": 0.2648, "step": 2800 }, { "epoch": 223.08, "learning_rate": 6.499865771812081e-05, "loss": 0.2699, "step": 2900 }, { "epoch": 230.77, "learning_rate": 6.446174496644296e-05, "loss": 0.2604, "step": 3000 }, { "epoch": 230.77, "eval_las": 17.339667458432302, "eval_loss": 14.176854133605957, "eval_runtime": 0.5399, "eval_samples_per_second": 148.179, "eval_steps_per_second": 18.522, "eval_uas": 34.04592240696754, "step": 3000 }, { "epoch": 230.77, "step": 3000, "total_flos": 1.541530495893504e+16, "train_loss": 0.6324253260294597, "train_runtime": 1509.7376, "train_samples_per_second": 317.936, "train_steps_per_second": 9.936 } ], "max_steps": 15000, "num_train_epochs": 1154, "total_flos": 1.541530495893504e+16, "trial_name": null, "trial_params": null }