|
{ |
|
"best_metric": 82.48871048670347, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/bert-base-finetuned-parsing-ud-Korean-GSD/checkpoint-2000", |
|
"epoch": 32.608695652173914, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.76e-05, |
|
"loss": 3.3939, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.947919463087248e-05, |
|
"loss": 1.0824, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.894228187919463e-05, |
|
"loss": 0.8005, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.840536912751678e-05, |
|
"loss": 0.5864, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.786845637583893e-05, |
|
"loss": 0.4237, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_las": 80.40642247867537, |
|
"eval_loss": 0.9098409414291382, |
|
"eval_runtime": 6.054, |
|
"eval_samples_per_second": 156.921, |
|
"eval_steps_per_second": 19.656, |
|
"eval_uas": 84.83860177287171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.733154362416108e-05, |
|
"loss": 0.3544, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 7.679463087248322e-05, |
|
"loss": 0.2966, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.625771812080537e-05, |
|
"loss": 0.2265, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 7.572080536912752e-05, |
|
"loss": 0.1954, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 7.518389261744967e-05, |
|
"loss": 0.1759, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_las": 81.9284161230975, |
|
"eval_loss": 1.3141283988952637, |
|
"eval_runtime": 6.0488, |
|
"eval_samples_per_second": 157.057, |
|
"eval_steps_per_second": 19.673, |
|
"eval_uas": 86.06790433182806, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 7.464697986577182e-05, |
|
"loss": 0.1628, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 7.411006711409397e-05, |
|
"loss": 0.1266, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 7.357315436241611e-05, |
|
"loss": 0.1255, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 7.303624161073826e-05, |
|
"loss": 0.1183, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 7.249932885906041e-05, |
|
"loss": 0.1053, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_las": 81.93677872553938, |
|
"eval_loss": 1.3510112762451172, |
|
"eval_runtime": 6.0457, |
|
"eval_samples_per_second": 157.137, |
|
"eval_steps_per_second": 19.684, |
|
"eval_uas": 86.15989295868874, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 7.196241610738256e-05, |
|
"loss": 0.0971, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 7.142550335570471e-05, |
|
"loss": 0.0906, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 7.088859060402686e-05, |
|
"loss": 0.0805, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 7.0351677852349e-05, |
|
"loss": 0.077, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 6.981476510067114e-05, |
|
"loss": 0.0688, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_las": 82.48871048670347, |
|
"eval_loss": 1.5172237157821655, |
|
"eval_runtime": 6.0437, |
|
"eval_samples_per_second": 157.189, |
|
"eval_steps_per_second": 19.69, |
|
"eval_uas": 86.62819869543402, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.927785234899329e-05, |
|
"loss": 0.0692, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 6.874093959731543e-05, |
|
"loss": 0.0678, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 6.820402684563758e-05, |
|
"loss": 0.0587, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 6.766711409395973e-05, |
|
"loss": 0.0621, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 6.713020134228188e-05, |
|
"loss": 0.0579, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_las": 82.36327145007526, |
|
"eval_loss": 1.7050881385803223, |
|
"eval_runtime": 6.0527, |
|
"eval_samples_per_second": 156.956, |
|
"eval_steps_per_second": 19.661, |
|
"eval_uas": 86.70346211741094, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 6.659328859060403e-05, |
|
"loss": 0.0518, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 6.605637583892618e-05, |
|
"loss": 0.0529, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 6.551946308724832e-05, |
|
"loss": 0.0519, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 6.498255033557047e-05, |
|
"loss": 0.0491, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 6.444563758389262e-05, |
|
"loss": 0.0438, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_las": 82.26292022077271, |
|
"eval_loss": 1.7979624271392822, |
|
"eval_runtime": 6.0527, |
|
"eval_samples_per_second": 156.955, |
|
"eval_steps_per_second": 19.661, |
|
"eval_uas": 86.36895801973574, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 6.390872483221477e-05, |
|
"loss": 0.045, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 6.337181208053692e-05, |
|
"loss": 0.0457, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 6.284026845637584e-05, |
|
"loss": 0.0417, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 6.230335570469799e-05, |
|
"loss": 0.0381, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 6.176644295302013e-05, |
|
"loss": 0.0387, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"eval_las": 81.73607626693426, |
|
"eval_loss": 1.8726658821105957, |
|
"eval_runtime": 6.0404, |
|
"eval_samples_per_second": 157.274, |
|
"eval_steps_per_second": 19.701, |
|
"eval_uas": 85.83375146345543, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 6.122953020134228e-05, |
|
"loss": 0.038, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 6.069261744966444e-05, |
|
"loss": 0.0367, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 6.0155704697986585e-05, |
|
"loss": 0.0361, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 5.9618791946308734e-05, |
|
"loss": 0.0396, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 5.9081879194630875e-05, |
|
"loss": 0.0373, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_las": 82.27964542565647, |
|
"eval_loss": 1.884783387184143, |
|
"eval_runtime": 6.0511, |
|
"eval_samples_per_second": 156.995, |
|
"eval_steps_per_second": 19.666, |
|
"eval_uas": 86.27696939287506, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 5.854496644295302e-05, |
|
"loss": 0.0296, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 5.800805369127517e-05, |
|
"loss": 0.0291, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 5.747114093959732e-05, |
|
"loss": 0.0301, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 31.88, |
|
"learning_rate": 5.693422818791947e-05, |
|
"loss": 0.0321, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 5.6397315436241616e-05, |
|
"loss": 0.0281, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"eval_las": 82.12075597926075, |
|
"eval_loss": 2.1597061157226562, |
|
"eval_runtime": 6.0549, |
|
"eval_samples_per_second": 156.896, |
|
"eval_steps_per_second": 19.653, |
|
"eval_uas": 85.9675531025255, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"step": 4500, |
|
"total_flos": 2.3959177620750336e+16, |
|
"train_loss": 0.21554310941696167, |
|
"train_runtime": 2399.8621, |
|
"train_samples_per_second": 200.011, |
|
"train_steps_per_second": 6.25 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 109, |
|
"total_flos": 2.3959177620750336e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|