Md Mushfiqur Rahman
Upload with huggingface_hub
3bf5c3f
raw
history blame
5.84 kB
{
"best_metric": 0.9572671015219937,
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-pos-ud-Korean-GSD/checkpoint-500",
"epoch": 21.73913043478261,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.72,
"learning_rate": 7.840000000000001e-05,
"loss": 0.9879,
"step": 100
},
{
"epoch": 1.45,
"learning_rate": 7.947382550335571e-05,
"loss": 0.2181,
"step": 200
},
{
"epoch": 2.17,
"learning_rate": 7.893691275167786e-05,
"loss": 0.1495,
"step": 300
},
{
"epoch": 2.9,
"learning_rate": 7.840000000000001e-05,
"loss": 0.1132,
"step": 400
},
{
"epoch": 3.62,
"learning_rate": 7.786308724832216e-05,
"loss": 0.0799,
"step": 500
},
{
"epoch": 3.62,
"eval_accuracy": 0.9572671015219937,
"eval_loss": 0.18942660093307495,
"eval_runtime": 4.8972,
"eval_samples_per_second": 193.989,
"eval_steps_per_second": 24.3,
"step": 500
},
{
"epoch": 4.35,
"learning_rate": 7.73261744966443e-05,
"loss": 0.0661,
"step": 600
},
{
"epoch": 5.07,
"learning_rate": 7.678926174496645e-05,
"loss": 0.0532,
"step": 700
},
{
"epoch": 5.8,
"learning_rate": 7.62523489932886e-05,
"loss": 0.0401,
"step": 800
},
{
"epoch": 6.52,
"learning_rate": 7.571543624161075e-05,
"loss": 0.0368,
"step": 900
},
{
"epoch": 7.25,
"learning_rate": 7.51785234899329e-05,
"loss": 0.0331,
"step": 1000
},
{
"epoch": 7.25,
"eval_accuracy": 0.9542565646429169,
"eval_loss": 0.23469237983226776,
"eval_runtime": 4.9192,
"eval_samples_per_second": 193.122,
"eval_steps_per_second": 24.191,
"step": 1000
},
{
"epoch": 7.97,
"learning_rate": 7.464161073825505e-05,
"loss": 0.0279,
"step": 1100
},
{
"epoch": 8.7,
"learning_rate": 7.410469798657718e-05,
"loss": 0.0276,
"step": 1200
},
{
"epoch": 9.42,
"learning_rate": 7.356778523489933e-05,
"loss": 0.0239,
"step": 1300
},
{
"epoch": 10.14,
"learning_rate": 7.303087248322148e-05,
"loss": 0.0211,
"step": 1400
},
{
"epoch": 10.87,
"learning_rate": 7.249395973154363e-05,
"loss": 0.019,
"step": 1500
},
{
"epoch": 10.87,
"eval_accuracy": 0.9547583207894297,
"eval_loss": 0.2637677788734436,
"eval_runtime": 4.9181,
"eval_samples_per_second": 193.163,
"eval_steps_per_second": 24.196,
"step": 1500
},
{
"epoch": 11.59,
"learning_rate": 7.195704697986577e-05,
"loss": 0.0174,
"step": 1600
},
{
"epoch": 12.32,
"learning_rate": 7.142013422818792e-05,
"loss": 0.0178,
"step": 1700
},
{
"epoch": 13.04,
"learning_rate": 7.088322147651007e-05,
"loss": 0.0181,
"step": 1800
},
{
"epoch": 13.77,
"learning_rate": 7.034630872483222e-05,
"loss": 0.014,
"step": 1900
},
{
"epoch": 14.49,
"learning_rate": 6.980939597315437e-05,
"loss": 0.0144,
"step": 2000
},
{
"epoch": 14.49,
"eval_accuracy": 0.9535875564475664,
"eval_loss": 0.2897445857524872,
"eval_runtime": 4.9125,
"eval_samples_per_second": 193.386,
"eval_steps_per_second": 24.224,
"step": 2000
},
{
"epoch": 15.22,
"learning_rate": 6.927248322147651e-05,
"loss": 0.0154,
"step": 2100
},
{
"epoch": 15.94,
"learning_rate": 6.873557046979866e-05,
"loss": 0.0133,
"step": 2200
},
{
"epoch": 16.67,
"learning_rate": 6.819865771812081e-05,
"loss": 0.0142,
"step": 2300
},
{
"epoch": 17.39,
"learning_rate": 6.766174496644296e-05,
"loss": 0.0132,
"step": 2400
},
{
"epoch": 18.12,
"learning_rate": 6.712483221476511e-05,
"loss": 0.013,
"step": 2500
},
{
"epoch": 18.12,
"eval_accuracy": 0.9560127111557116,
"eval_loss": 0.2839347720146179,
"eval_runtime": 4.9076,
"eval_samples_per_second": 193.578,
"eval_steps_per_second": 24.248,
"step": 2500
},
{
"epoch": 18.84,
"learning_rate": 6.658791946308726e-05,
"loss": 0.0114,
"step": 2600
},
{
"epoch": 19.57,
"learning_rate": 6.60510067114094e-05,
"loss": 0.0106,
"step": 2700
},
{
"epoch": 20.29,
"learning_rate": 6.551409395973155e-05,
"loss": 0.0103,
"step": 2800
},
{
"epoch": 21.01,
"learning_rate": 6.49771812080537e-05,
"loss": 0.0123,
"step": 2900
},
{
"epoch": 21.74,
"learning_rate": 6.444026845637585e-05,
"loss": 0.0093,
"step": 3000
},
{
"epoch": 21.74,
"eval_accuracy": 0.9540056865696604,
"eval_loss": 0.2998155355453491,
"eval_runtime": 4.8962,
"eval_samples_per_second": 194.026,
"eval_steps_per_second": 24.304,
"step": 3000
},
{
"epoch": 21.74,
"step": 3000,
"total_flos": 1.2500041419988992e+16,
"train_loss": 0.0700680200457573,
"train_runtime": 844.3355,
"train_samples_per_second": 568.494,
"train_steps_per_second": 17.765
}
],
"max_steps": 15000,
"num_train_epochs": 109,
"total_flos": 1.2500041419988992e+16,
"trial_name": null,
"trial_params": null
}