{ "best_metric": 0.5861960041064519, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Chinese-GSD/checkpoint-500", "epoch": 24.0, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 4.9500000000000004e-05, "loss": 1.8173, "step": 100 }, { "epoch": 1.6, "learning_rate": 4.966778523489933e-05, "loss": 1.3257, "step": 200 }, { "epoch": 2.4, "learning_rate": 4.933221476510068e-05, "loss": 1.2541, "step": 300 }, { "epoch": 3.2, "learning_rate": 4.8996644295302016e-05, "loss": 1.2279, "step": 400 }, { "epoch": 4.0, "learning_rate": 4.8661073825503355e-05, "loss": 1.1842, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.5861960041064519, "eval_loss": 1.2214140892028809, "eval_runtime": 2.5478, "eval_samples_per_second": 196.251, "eval_steps_per_second": 24.728, "step": 500 }, { "epoch": 4.8, "learning_rate": 4.83255033557047e-05, "loss": 1.1457, "step": 600 }, { "epoch": 5.6, "learning_rate": 4.798993288590604e-05, "loss": 1.1089, "step": 700 }, { "epoch": 6.4, "learning_rate": 4.765436241610739e-05, "loss": 1.0922, "step": 800 }, { "epoch": 7.2, "learning_rate": 4.7318791946308726e-05, "loss": 1.0397, "step": 900 }, { "epoch": 8.0, "learning_rate": 4.698322147651007e-05, "loss": 1.0256, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.5806680881307748, "eval_loss": 1.2595834732055664, "eval_runtime": 2.5388, "eval_samples_per_second": 196.945, "eval_steps_per_second": 24.815, "step": 1000 }, { "epoch": 8.8, "learning_rate": 4.664765100671141e-05, "loss": 0.9657, "step": 1100 }, { "epoch": 9.6, "learning_rate": 4.631208053691276e-05, "loss": 0.9376, "step": 1200 }, { "epoch": 10.4, "learning_rate": 4.5976510067114097e-05, "loss": 0.901, "step": 1300 }, { "epoch": 11.2, "learning_rate": 4.564093959731544e-05, "loss": 0.8583, "step": 1400 }, { "epoch": 12.0, "learning_rate": 4.5305369127516775e-05, "loss": 0.832, "step": 1500 }, { "epoch": 12.0, "eval_accuracy": 0.5698491668640923, "eval_loss": 1.4014437198638916, "eval_runtime": 2.5467, "eval_samples_per_second": 196.333, "eval_steps_per_second": 24.738, "step": 1500 }, { "epoch": 12.8, "learning_rate": 4.496979865771812e-05, "loss": 0.7809, "step": 1600 }, { "epoch": 13.6, "learning_rate": 4.463422818791946e-05, "loss": 0.7387, "step": 1700 }, { "epoch": 14.4, "learning_rate": 4.4298657718120806e-05, "loss": 0.7135, "step": 1800 }, { "epoch": 15.2, "learning_rate": 4.3963087248322146e-05, "loss": 0.6757, "step": 1900 }, { "epoch": 16.0, "learning_rate": 4.362751677852349e-05, "loss": 0.6519, "step": 2000 }, { "epoch": 16.0, "eval_accuracy": 0.5552396746426597, "eval_loss": 1.6267313957214355, "eval_runtime": 2.5372, "eval_samples_per_second": 197.067, "eval_steps_per_second": 24.83, "step": 2000 }, { "epoch": 16.8, "learning_rate": 4.329194630872484e-05, "loss": 0.6038, "step": 2100 }, { "epoch": 17.6, "learning_rate": 4.295637583892618e-05, "loss": 0.5758, "step": 2200 }, { "epoch": 18.4, "learning_rate": 4.262080536912752e-05, "loss": 0.545, "step": 2300 }, { "epoch": 19.2, "learning_rate": 4.228523489932886e-05, "loss": 0.5239, "step": 2400 }, { "epoch": 20.0, "learning_rate": 4.194966442953021e-05, "loss": 0.5023, "step": 2500 }, { "epoch": 20.0, "eval_accuracy": 0.550817341862118, "eval_loss": 1.8601970672607422, "eval_runtime": 2.5314, "eval_samples_per_second": 197.52, "eval_steps_per_second": 24.887, "step": 2500 }, { "epoch": 20.8, "learning_rate": 4.161409395973155e-05, "loss": 0.4627, "step": 2600 }, { "epoch": 21.6, "learning_rate": 4.127852348993289e-05, "loss": 0.4396, "step": 2700 }, { "epoch": 22.4, "learning_rate": 4.0942953020134226e-05, "loss": 0.4187, "step": 2800 }, { "epoch": 23.2, "learning_rate": 4.060738255033557e-05, "loss": 0.4056, "step": 2900 }, { "epoch": 24.0, "learning_rate": 4.027181208053691e-05, "loss": 0.3811, "step": 3000 }, { "epoch": 24.0, "eval_accuracy": 0.5463160388533523, "eval_loss": 2.0793230533599854, "eval_runtime": 2.5286, "eval_samples_per_second": 197.741, "eval_steps_per_second": 24.915, "step": 3000 }, { "epoch": 24.0, "step": 3000, "total_flos": 1.2534537269366784e+16, "train_loss": 0.8378373018900553, "train_runtime": 774.849, "train_samples_per_second": 619.476, "train_steps_per_second": 19.359 } ], "max_steps": 15000, "num_train_epochs": 120, "total_flos": 1.2534537269366784e+16, "trial_name": null, "trial_params": null }