{ "best_metric": 0.9572671015219937, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-pos-ud-Korean-GSD/checkpoint-500", "epoch": 21.73913043478261, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.72, "learning_rate": 7.840000000000001e-05, "loss": 0.9879, "step": 100 }, { "epoch": 1.45, "learning_rate": 7.947382550335571e-05, "loss": 0.2181, "step": 200 }, { "epoch": 2.17, "learning_rate": 7.893691275167786e-05, "loss": 0.1495, "step": 300 }, { "epoch": 2.9, "learning_rate": 7.840000000000001e-05, "loss": 0.1132, "step": 400 }, { "epoch": 3.62, "learning_rate": 7.786308724832216e-05, "loss": 0.0799, "step": 500 }, { "epoch": 3.62, "eval_accuracy": 0.9572671015219937, "eval_loss": 0.18942660093307495, "eval_runtime": 4.8972, "eval_samples_per_second": 193.989, "eval_steps_per_second": 24.3, "step": 500 }, { "epoch": 4.35, "learning_rate": 7.73261744966443e-05, "loss": 0.0661, "step": 600 }, { "epoch": 5.07, "learning_rate": 7.678926174496645e-05, "loss": 0.0532, "step": 700 }, { "epoch": 5.8, "learning_rate": 7.62523489932886e-05, "loss": 0.0401, "step": 800 }, { "epoch": 6.52, "learning_rate": 7.571543624161075e-05, "loss": 0.0368, "step": 900 }, { "epoch": 7.25, "learning_rate": 7.51785234899329e-05, "loss": 0.0331, "step": 1000 }, { "epoch": 7.25, "eval_accuracy": 0.9542565646429169, "eval_loss": 0.23469237983226776, "eval_runtime": 4.9192, "eval_samples_per_second": 193.122, "eval_steps_per_second": 24.191, "step": 1000 }, { "epoch": 7.97, "learning_rate": 7.464161073825505e-05, "loss": 0.0279, "step": 1100 }, { "epoch": 8.7, "learning_rate": 7.410469798657718e-05, "loss": 0.0276, "step": 1200 }, { "epoch": 9.42, "learning_rate": 7.356778523489933e-05, "loss": 0.0239, "step": 1300 }, { "epoch": 10.14, "learning_rate": 7.303087248322148e-05, "loss": 0.0211, "step": 1400 }, { "epoch": 10.87, "learning_rate": 7.249395973154363e-05, "loss": 0.019, "step": 1500 }, { "epoch": 10.87, "eval_accuracy": 0.9547583207894297, "eval_loss": 0.2637677788734436, "eval_runtime": 4.9181, "eval_samples_per_second": 193.163, "eval_steps_per_second": 24.196, "step": 1500 }, { "epoch": 11.59, "learning_rate": 7.195704697986577e-05, "loss": 0.0174, "step": 1600 }, { "epoch": 12.32, "learning_rate": 7.142013422818792e-05, "loss": 0.0178, "step": 1700 }, { "epoch": 13.04, "learning_rate": 7.088322147651007e-05, "loss": 0.0181, "step": 1800 }, { "epoch": 13.77, "learning_rate": 7.034630872483222e-05, "loss": 0.014, "step": 1900 }, { "epoch": 14.49, "learning_rate": 6.980939597315437e-05, "loss": 0.0144, "step": 2000 }, { "epoch": 14.49, "eval_accuracy": 0.9535875564475664, "eval_loss": 0.2897445857524872, "eval_runtime": 4.9125, "eval_samples_per_second": 193.386, "eval_steps_per_second": 24.224, "step": 2000 }, { "epoch": 15.22, "learning_rate": 6.927248322147651e-05, "loss": 0.0154, "step": 2100 }, { "epoch": 15.94, "learning_rate": 6.873557046979866e-05, "loss": 0.0133, "step": 2200 }, { "epoch": 16.67, "learning_rate": 6.819865771812081e-05, "loss": 0.0142, "step": 2300 }, { "epoch": 17.39, "learning_rate": 6.766174496644296e-05, "loss": 0.0132, "step": 2400 }, { "epoch": 18.12, "learning_rate": 6.712483221476511e-05, "loss": 0.013, "step": 2500 }, { "epoch": 18.12, "eval_accuracy": 0.9560127111557116, "eval_loss": 0.2839347720146179, "eval_runtime": 4.9076, "eval_samples_per_second": 193.578, "eval_steps_per_second": 24.248, "step": 2500 }, { "epoch": 18.84, "learning_rate": 6.658791946308726e-05, "loss": 0.0114, "step": 2600 }, { "epoch": 19.57, "learning_rate": 6.60510067114094e-05, "loss": 0.0106, "step": 2700 }, { "epoch": 20.29, "learning_rate": 6.551409395973155e-05, "loss": 0.0103, "step": 2800 }, { "epoch": 21.01, "learning_rate": 6.49771812080537e-05, "loss": 0.0123, "step": 2900 }, { "epoch": 21.74, "learning_rate": 6.444026845637585e-05, "loss": 0.0093, "step": 3000 }, { "epoch": 21.74, "eval_accuracy": 0.9540056865696604, "eval_loss": 0.2998155355453491, "eval_runtime": 4.8962, "eval_samples_per_second": 194.026, "eval_steps_per_second": 24.304, "step": 3000 }, { "epoch": 21.74, "step": 3000, "total_flos": 1.2500041419988992e+16, "train_loss": 0.0700680200457573, "train_runtime": 844.3355, "train_samples_per_second": 568.494, "train_steps_per_second": 17.765 } ], "max_steps": 15000, "num_train_epochs": 109, "total_flos": 1.2500041419988992e+16, "trial_name": null, "trial_params": null }