Md Mushfiqur Rahman
Upload with huggingface_hub
07a7d55
{
"best_metric": 82.59496169943931,
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/bert-base-finetuned-parsing-ud-Chinese-GSD/checkpoint-2000",
"epoch": 36.0,
"global_step": 4500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8,
"learning_rate": 7.840000000000001e-05,
"loss": 3.3926,
"step": 100
},
{
"epoch": 1.6,
"learning_rate": 7.947919463087248e-05,
"loss": 1.027,
"step": 200
},
{
"epoch": 2.4,
"learning_rate": 7.894228187919463e-05,
"loss": 0.6817,
"step": 300
},
{
"epoch": 3.2,
"learning_rate": 7.840536912751678e-05,
"loss": 0.5063,
"step": 400
},
{
"epoch": 4.0,
"learning_rate": 7.786845637583893e-05,
"loss": 0.367,
"step": 500
},
{
"epoch": 4.0,
"eval_las": 81.48148148148148,
"eval_loss": 0.8465050458908081,
"eval_runtime": 3.7622,
"eval_samples_per_second": 132.902,
"eval_steps_per_second": 16.746,
"eval_uas": 85.58003632630499,
"step": 500
},
{
"epoch": 4.8,
"learning_rate": 7.733154362416108e-05,
"loss": 0.259,
"step": 600
},
{
"epoch": 5.6,
"learning_rate": 7.679463087248322e-05,
"loss": 0.2121,
"step": 700
},
{
"epoch": 6.4,
"learning_rate": 7.625771812080537e-05,
"loss": 0.1829,
"step": 800
},
{
"epoch": 7.2,
"learning_rate": 7.572080536912752e-05,
"loss": 0.1592,
"step": 900
},
{
"epoch": 8.0,
"learning_rate": 7.518389261744967e-05,
"loss": 0.135,
"step": 1000
},
{
"epoch": 8.0,
"eval_las": 82.23959567243149,
"eval_loss": 1.149000883102417,
"eval_runtime": 3.7521,
"eval_samples_per_second": 133.26,
"eval_steps_per_second": 16.791,
"eval_uas": 86.01437258153676,
"step": 1000
},
{
"epoch": 8.8,
"learning_rate": 7.464697986577182e-05,
"loss": 0.1142,
"step": 1100
},
{
"epoch": 9.6,
"learning_rate": 7.411006711409397e-05,
"loss": 0.1008,
"step": 1200
},
{
"epoch": 10.4,
"learning_rate": 7.357315436241611e-05,
"loss": 0.0923,
"step": 1300
},
{
"epoch": 11.2,
"learning_rate": 7.303624161073826e-05,
"loss": 0.0833,
"step": 1400
},
{
"epoch": 12.0,
"learning_rate": 7.249932885906041e-05,
"loss": 0.0804,
"step": 1500
},
{
"epoch": 12.0,
"eval_las": 81.89212666824606,
"eval_loss": 1.3434501886367798,
"eval_runtime": 3.7531,
"eval_samples_per_second": 133.225,
"eval_steps_per_second": 16.786,
"eval_uas": 85.7458738055753,
"step": 1500
},
{
"epoch": 12.8,
"learning_rate": 7.196241610738256e-05,
"loss": 0.0703,
"step": 1600
},
{
"epoch": 13.6,
"learning_rate": 7.142550335570471e-05,
"loss": 0.0663,
"step": 1700
},
{
"epoch": 14.4,
"learning_rate": 7.088859060402686e-05,
"loss": 0.0638,
"step": 1800
},
{
"epoch": 15.2,
"learning_rate": 7.0351677852349e-05,
"loss": 0.0564,
"step": 1900
},
{
"epoch": 16.0,
"learning_rate": 6.981476510067114e-05,
"loss": 0.0593,
"step": 2000
},
{
"epoch": 16.0,
"eval_las": 82.59496169943931,
"eval_loss": 1.4142358303070068,
"eval_runtime": 3.7723,
"eval_samples_per_second": 132.545,
"eval_steps_per_second": 16.701,
"eval_uas": 86.22759219774146,
"step": 2000
},
{
"epoch": 16.8,
"learning_rate": 6.927785234899329e-05,
"loss": 0.0506,
"step": 2100
},
{
"epoch": 17.6,
"learning_rate": 6.874093959731543e-05,
"loss": 0.0486,
"step": 2200
},
{
"epoch": 18.4,
"learning_rate": 6.820402684563758e-05,
"loss": 0.0481,
"step": 2300
},
{
"epoch": 19.2,
"learning_rate": 6.766711409395973e-05,
"loss": 0.0433,
"step": 2400
},
{
"epoch": 20.0,
"learning_rate": 6.713020134228188e-05,
"loss": 0.0425,
"step": 2500
},
{
"epoch": 20.0,
"eval_las": 81.96319987364762,
"eval_loss": 1.5313485860824585,
"eval_runtime": 3.7514,
"eval_samples_per_second": 133.284,
"eval_steps_per_second": 16.794,
"eval_uas": 85.66690357735133,
"step": 2500
},
{
"epoch": 20.8,
"learning_rate": 6.659328859060403e-05,
"loss": 0.0427,
"step": 2600
},
{
"epoch": 21.6,
"learning_rate": 6.605637583892618e-05,
"loss": 0.0386,
"step": 2700
},
{
"epoch": 22.4,
"learning_rate": 6.551946308724832e-05,
"loss": 0.0384,
"step": 2800
},
{
"epoch": 23.2,
"learning_rate": 6.498255033557047e-05,
"loss": 0.0329,
"step": 2900
},
{
"epoch": 24.0,
"learning_rate": 6.444563758389262e-05,
"loss": 0.0367,
"step": 3000
},
{
"epoch": 24.0,
"eval_las": 82.53178551686014,
"eval_loss": 1.592229962348938,
"eval_runtime": 3.6848,
"eval_samples_per_second": 135.693,
"eval_steps_per_second": 17.097,
"eval_uas": 86.06965174129353,
"step": 3000
},
{
"epoch": 24.8,
"learning_rate": 6.390872483221477e-05,
"loss": 0.0318,
"step": 3100
},
{
"epoch": 25.6,
"learning_rate": 6.337181208053692e-05,
"loss": 0.0335,
"step": 3200
},
{
"epoch": 26.4,
"learning_rate": 6.283489932885907e-05,
"loss": 0.033,
"step": 3300
},
{
"epoch": 27.2,
"learning_rate": 6.229798657718121e-05,
"loss": 0.0305,
"step": 3400
},
{
"epoch": 28.0,
"learning_rate": 6.176107382550336e-05,
"loss": 0.0286,
"step": 3500
},
{
"epoch": 28.0,
"eval_las": 82.16852246702993,
"eval_loss": 1.742520809173584,
"eval_runtime": 3.6834,
"eval_samples_per_second": 135.745,
"eval_steps_per_second": 17.104,
"eval_uas": 85.96699044460239,
"step": 3500
},
{
"epoch": 28.8,
"learning_rate": 6.122416107382551e-05,
"loss": 0.0269,
"step": 3600
},
{
"epoch": 29.6,
"learning_rate": 6.068724832214766e-05,
"loss": 0.0242,
"step": 3700
},
{
"epoch": 30.4,
"learning_rate": 6.015033557046981e-05,
"loss": 0.0273,
"step": 3800
},
{
"epoch": 31.2,
"learning_rate": 5.9613422818791955e-05,
"loss": 0.0244,
"step": 3900
},
{
"epoch": 32.0,
"learning_rate": 5.90765100671141e-05,
"loss": 0.0254,
"step": 4000
},
{
"epoch": 32.0,
"eval_las": 81.81315644002211,
"eval_loss": 1.792807698249817,
"eval_runtime": 3.6847,
"eval_samples_per_second": 135.698,
"eval_steps_per_second": 17.098,
"eval_uas": 85.65110953170655,
"step": 4000
},
{
"epoch": 32.8,
"learning_rate": 5.853959731543625e-05,
"loss": 0.0234,
"step": 4100
},
{
"epoch": 33.6,
"learning_rate": 5.80026845637584e-05,
"loss": 0.0228,
"step": 4200
},
{
"epoch": 34.4,
"learning_rate": 5.7465771812080534e-05,
"loss": 0.0219,
"step": 4300
},
{
"epoch": 35.2,
"learning_rate": 5.692885906040268e-05,
"loss": 0.0204,
"step": 4400
},
{
"epoch": 36.0,
"learning_rate": 5.639194630872483e-05,
"loss": 0.0238,
"step": 4500
},
{
"epoch": 36.0,
"eval_las": 81.94740582800284,
"eval_loss": 1.849847435951233,
"eval_runtime": 3.6813,
"eval_samples_per_second": 135.822,
"eval_steps_per_second": 17.114,
"eval_uas": 85.52475716654821,
"step": 4500
},
{
"epoch": 36.0,
"step": 4500,
"total_flos": 2.4026636277633024e+16,
"train_loss": 0.1895591730541653,
"train_runtime": 2603.8023,
"train_samples_per_second": 184.346,
"train_steps_per_second": 5.761
}
],
"max_steps": 15000,
"num_train_epochs": 120,
"total_flos": 2.4026636277633024e+16,
"trial_name": null,
"trial_params": null
}