{ "best_metric": 82.59496169943931, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/bert-base-finetuned-parsing-ud-Chinese-GSD/checkpoint-2000", "epoch": 36.0, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 7.840000000000001e-05, "loss": 3.3926, "step": 100 }, { "epoch": 1.6, "learning_rate": 7.947919463087248e-05, "loss": 1.027, "step": 200 }, { "epoch": 2.4, "learning_rate": 7.894228187919463e-05, "loss": 0.6817, "step": 300 }, { "epoch": 3.2, "learning_rate": 7.840536912751678e-05, "loss": 0.5063, "step": 400 }, { "epoch": 4.0, "learning_rate": 7.786845637583893e-05, "loss": 0.367, "step": 500 }, { "epoch": 4.0, "eval_las": 81.48148148148148, "eval_loss": 0.8465050458908081, "eval_runtime": 3.7622, "eval_samples_per_second": 132.902, "eval_steps_per_second": 16.746, "eval_uas": 85.58003632630499, "step": 500 }, { "epoch": 4.8, "learning_rate": 7.733154362416108e-05, "loss": 0.259, "step": 600 }, { "epoch": 5.6, "learning_rate": 7.679463087248322e-05, "loss": 0.2121, "step": 700 }, { "epoch": 6.4, "learning_rate": 7.625771812080537e-05, "loss": 0.1829, "step": 800 }, { "epoch": 7.2, "learning_rate": 7.572080536912752e-05, "loss": 0.1592, "step": 900 }, { "epoch": 8.0, "learning_rate": 7.518389261744967e-05, "loss": 0.135, "step": 1000 }, { "epoch": 8.0, "eval_las": 82.23959567243149, "eval_loss": 1.149000883102417, "eval_runtime": 3.7521, "eval_samples_per_second": 133.26, "eval_steps_per_second": 16.791, "eval_uas": 86.01437258153676, "step": 1000 }, { "epoch": 8.8, "learning_rate": 7.464697986577182e-05, "loss": 0.1142, "step": 1100 }, { "epoch": 9.6, "learning_rate": 7.411006711409397e-05, "loss": 0.1008, "step": 1200 }, { "epoch": 10.4, "learning_rate": 7.357315436241611e-05, "loss": 0.0923, "step": 1300 }, { "epoch": 11.2, "learning_rate": 7.303624161073826e-05, "loss": 0.0833, "step": 1400 }, { "epoch": 12.0, "learning_rate": 7.249932885906041e-05, "loss": 0.0804, "step": 1500 }, { "epoch": 12.0, "eval_las": 81.89212666824606, "eval_loss": 1.3434501886367798, "eval_runtime": 3.7531, "eval_samples_per_second": 133.225, "eval_steps_per_second": 16.786, "eval_uas": 85.7458738055753, "step": 1500 }, { "epoch": 12.8, "learning_rate": 7.196241610738256e-05, "loss": 0.0703, "step": 1600 }, { "epoch": 13.6, "learning_rate": 7.142550335570471e-05, "loss": 0.0663, "step": 1700 }, { "epoch": 14.4, "learning_rate": 7.088859060402686e-05, "loss": 0.0638, "step": 1800 }, { "epoch": 15.2, "learning_rate": 7.0351677852349e-05, "loss": 0.0564, "step": 1900 }, { "epoch": 16.0, "learning_rate": 6.981476510067114e-05, "loss": 0.0593, "step": 2000 }, { "epoch": 16.0, "eval_las": 82.59496169943931, "eval_loss": 1.4142358303070068, "eval_runtime": 3.7723, "eval_samples_per_second": 132.545, "eval_steps_per_second": 16.701, "eval_uas": 86.22759219774146, "step": 2000 }, { "epoch": 16.8, "learning_rate": 6.927785234899329e-05, "loss": 0.0506, "step": 2100 }, { "epoch": 17.6, "learning_rate": 6.874093959731543e-05, "loss": 0.0486, "step": 2200 }, { "epoch": 18.4, "learning_rate": 6.820402684563758e-05, "loss": 0.0481, "step": 2300 }, { "epoch": 19.2, "learning_rate": 6.766711409395973e-05, "loss": 0.0433, "step": 2400 }, { "epoch": 20.0, "learning_rate": 6.713020134228188e-05, "loss": 0.0425, "step": 2500 }, { "epoch": 20.0, "eval_las": 81.96319987364762, "eval_loss": 1.5313485860824585, "eval_runtime": 3.7514, "eval_samples_per_second": 133.284, "eval_steps_per_second": 16.794, "eval_uas": 85.66690357735133, "step": 2500 }, { "epoch": 20.8, "learning_rate": 6.659328859060403e-05, "loss": 0.0427, "step": 2600 }, { "epoch": 21.6, "learning_rate": 6.605637583892618e-05, "loss": 0.0386, "step": 2700 }, { "epoch": 22.4, "learning_rate": 6.551946308724832e-05, "loss": 0.0384, "step": 2800 }, { "epoch": 23.2, "learning_rate": 6.498255033557047e-05, "loss": 0.0329, "step": 2900 }, { "epoch": 24.0, "learning_rate": 6.444563758389262e-05, "loss": 0.0367, "step": 3000 }, { "epoch": 24.0, "eval_las": 82.53178551686014, "eval_loss": 1.592229962348938, "eval_runtime": 3.6848, "eval_samples_per_second": 135.693, "eval_steps_per_second": 17.097, "eval_uas": 86.06965174129353, "step": 3000 }, { "epoch": 24.8, "learning_rate": 6.390872483221477e-05, "loss": 0.0318, "step": 3100 }, { "epoch": 25.6, "learning_rate": 6.337181208053692e-05, "loss": 0.0335, "step": 3200 }, { "epoch": 26.4, "learning_rate": 6.283489932885907e-05, "loss": 0.033, "step": 3300 }, { "epoch": 27.2, "learning_rate": 6.229798657718121e-05, "loss": 0.0305, "step": 3400 }, { "epoch": 28.0, "learning_rate": 6.176107382550336e-05, "loss": 0.0286, "step": 3500 }, { "epoch": 28.0, "eval_las": 82.16852246702993, "eval_loss": 1.742520809173584, "eval_runtime": 3.6834, "eval_samples_per_second": 135.745, "eval_steps_per_second": 17.104, "eval_uas": 85.96699044460239, "step": 3500 }, { "epoch": 28.8, "learning_rate": 6.122416107382551e-05, "loss": 0.0269, "step": 3600 }, { "epoch": 29.6, "learning_rate": 6.068724832214766e-05, "loss": 0.0242, "step": 3700 }, { "epoch": 30.4, "learning_rate": 6.015033557046981e-05, "loss": 0.0273, "step": 3800 }, { "epoch": 31.2, "learning_rate": 5.9613422818791955e-05, "loss": 0.0244, "step": 3900 }, { "epoch": 32.0, "learning_rate": 5.90765100671141e-05, "loss": 0.0254, "step": 4000 }, { "epoch": 32.0, "eval_las": 81.81315644002211, "eval_loss": 1.792807698249817, "eval_runtime": 3.6847, "eval_samples_per_second": 135.698, "eval_steps_per_second": 17.098, "eval_uas": 85.65110953170655, "step": 4000 }, { "epoch": 32.8, "learning_rate": 5.853959731543625e-05, "loss": 0.0234, "step": 4100 }, { "epoch": 33.6, "learning_rate": 5.80026845637584e-05, "loss": 0.0228, "step": 4200 }, { "epoch": 34.4, "learning_rate": 5.7465771812080534e-05, "loss": 0.0219, "step": 4300 }, { "epoch": 35.2, "learning_rate": 5.692885906040268e-05, "loss": 0.0204, "step": 4400 }, { "epoch": 36.0, "learning_rate": 5.639194630872483e-05, "loss": 0.0238, "step": 4500 }, { "epoch": 36.0, "eval_las": 81.94740582800284, "eval_loss": 1.849847435951233, "eval_runtime": 3.6813, "eval_samples_per_second": 135.822, "eval_steps_per_second": 17.114, "eval_uas": 85.52475716654821, "step": 4500 }, { "epoch": 36.0, "step": 4500, "total_flos": 2.4026636277633024e+16, "train_loss": 0.1895591730541653, "train_runtime": 2603.8023, "train_samples_per_second": 184.346, "train_steps_per_second": 5.761 } ], "max_steps": 15000, "num_train_epochs": 120, "total_flos": 2.4026636277633024e+16, "trial_name": null, "trial_params": null }