{ "best_metric": 0.6053687907676869, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Korean-GSD/checkpoint-500", "epoch": 21.73913043478261, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.72, "learning_rate": 4.9e-05, "loss": 1.5142, "step": 100 }, { "epoch": 1.45, "learning_rate": 4.967114093959731e-05, "loss": 1.0614, "step": 200 }, { "epoch": 2.17, "learning_rate": 4.933557046979866e-05, "loss": 1.042, "step": 300 }, { "epoch": 2.9, "learning_rate": 4.9e-05, "loss": 1.0138, "step": 400 }, { "epoch": 3.62, "learning_rate": 4.8664429530201344e-05, "loss": 1.0024, "step": 500 }, { "epoch": 3.62, "eval_accuracy": 0.6053687907676869, "eval_loss": 1.007150650024414, "eval_runtime": 4.8522, "eval_samples_per_second": 195.787, "eval_steps_per_second": 24.525, "step": 500 }, { "epoch": 4.35, "learning_rate": 4.8328859060402684e-05, "loss": 1.0071, "step": 600 }, { "epoch": 5.07, "learning_rate": 4.799328859060403e-05, "loss": 0.9899, "step": 700 }, { "epoch": 5.8, "learning_rate": 4.7657718120805376e-05, "loss": 0.9818, "step": 800 }, { "epoch": 6.52, "learning_rate": 4.7322147651006715e-05, "loss": 0.9812, "step": 900 }, { "epoch": 7.25, "learning_rate": 4.698657718120806e-05, "loss": 0.9736, "step": 1000 }, { "epoch": 7.25, "eval_accuracy": 0.6013547415955846, "eval_loss": 1.0191282033920288, "eval_runtime": 4.8652, "eval_samples_per_second": 195.263, "eval_steps_per_second": 24.459, "step": 1000 }, { "epoch": 7.97, "learning_rate": 4.66510067114094e-05, "loss": 0.9626, "step": 1100 }, { "epoch": 8.7, "learning_rate": 4.631543624161074e-05, "loss": 0.9428, "step": 1200 }, { "epoch": 9.42, "learning_rate": 4.597986577181208e-05, "loss": 0.9431, "step": 1300 }, { "epoch": 10.14, "learning_rate": 4.5644295302013425e-05, "loss": 0.9438, "step": 1400 }, { "epoch": 10.87, "learning_rate": 4.5308724832214764e-05, "loss": 0.9281, "step": 1500 }, { "epoch": 10.87, "eval_accuracy": 0.6036126442548921, "eval_loss": 1.0334581136703491, "eval_runtime": 4.8675, "eval_samples_per_second": 195.172, "eval_steps_per_second": 24.448, "step": 1500 }, { "epoch": 11.59, "learning_rate": 4.497315436241611e-05, "loss": 0.9, "step": 1600 }, { "epoch": 12.32, "learning_rate": 4.463758389261745e-05, "loss": 0.9157, "step": 1700 }, { "epoch": 13.04, "learning_rate": 4.4302013422818796e-05, "loss": 0.8991, "step": 1800 }, { "epoch": 13.77, "learning_rate": 4.3966442953020135e-05, "loss": 0.8891, "step": 1900 }, { "epoch": 14.49, "learning_rate": 4.363087248322148e-05, "loss": 0.8694, "step": 2000 }, { "epoch": 14.49, "eval_accuracy": 0.5921558789095166, "eval_loss": 1.0879769325256348, "eval_runtime": 4.8603, "eval_samples_per_second": 195.461, "eval_steps_per_second": 24.484, "step": 2000 }, { "epoch": 15.22, "learning_rate": 4.329530201342282e-05, "loss": 0.8726, "step": 2100 }, { "epoch": 15.94, "learning_rate": 4.2959731543624166e-05, "loss": 0.8605, "step": 2200 }, { "epoch": 16.67, "learning_rate": 4.2624161073825505e-05, "loss": 0.8449, "step": 2300 }, { "epoch": 17.39, "learning_rate": 4.2288590604026845e-05, "loss": 0.8285, "step": 2400 }, { "epoch": 18.12, "learning_rate": 4.195302013422819e-05, "loss": 0.8418, "step": 2500 }, { "epoch": 18.12, "eval_accuracy": 0.5876400735909015, "eval_loss": 1.1507753133773804, "eval_runtime": 4.8677, "eval_samples_per_second": 195.164, "eval_steps_per_second": 24.447, "step": 2500 }, { "epoch": 18.84, "learning_rate": 4.161744966442953e-05, "loss": 0.8277, "step": 2600 }, { "epoch": 19.57, "learning_rate": 4.1281879194630876e-05, "loss": 0.8069, "step": 2700 }, { "epoch": 20.29, "learning_rate": 4.0946308724832215e-05, "loss": 0.7859, "step": 2800 }, { "epoch": 21.01, "learning_rate": 4.061073825503356e-05, "loss": 0.8126, "step": 2900 }, { "epoch": 21.74, "learning_rate": 4.02751677852349e-05, "loss": 0.7752, "step": 3000 }, { "epoch": 21.74, "eval_accuracy": 0.5837096504432179, "eval_loss": 1.1938718557357788, "eval_runtime": 4.8673, "eval_samples_per_second": 195.178, "eval_steps_per_second": 24.449, "step": 3000 }, { "epoch": 21.74, "step": 3000, "total_flos": 1.2500041419988992e+16, "train_loss": 0.9339236297607422, "train_runtime": 790.8493, "train_samples_per_second": 606.942, "train_steps_per_second": 18.967 } ], "max_steps": 15000, "num_train_epochs": 109, "total_flos": 1.2500041419988992e+16, "trial_name": null, "trial_params": null }