{ "best_metric": 0.8056872037914692, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-wol/checkpoint-2000", "epoch": 91.52542372881356, "global_step": 5400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.39, "eval_accuracy_score": 0.9890510948905109, "eval_f1": 0.7586206896551724, "eval_loss": 0.06563546508550644, "eval_precision": 0.8191489361702128, "eval_recall": 0.7064220183486238, "eval_runtime": 3.7154, "eval_samples_per_second": 71.864, "eval_steps_per_second": 9.151, "step": 200 }, { "epoch": 6.78, "eval_accuracy_score": 0.989507299270073, "eval_f1": 0.8076923076923077, "eval_loss": 0.07390450686216354, "eval_precision": 0.8484848484848485, "eval_recall": 0.7706422018348624, "eval_runtime": 3.7144, "eval_samples_per_second": 71.883, "eval_steps_per_second": 9.154, "step": 400 }, { "epoch": 8.47, "learning_rate": 4.865771812080537e-05, "loss": 0.0928, "step": 500 }, { "epoch": 10.17, "eval_accuracy_score": 0.989507299270073, "eval_f1": 0.7926267281105991, "eval_loss": 0.0707671046257019, "eval_precision": 0.7962962962962963, "eval_recall": 0.7889908256880734, "eval_runtime": 3.7124, "eval_samples_per_second": 71.921, "eval_steps_per_second": 9.159, "step": 600 }, { "epoch": 13.56, "eval_accuracy_score": 0.9890510948905109, "eval_f1": 0.7924528301886793, "eval_loss": 0.08138999342918396, "eval_precision": 0.8155339805825242, "eval_recall": 0.7706422018348624, "eval_runtime": 3.7153, "eval_samples_per_second": 71.864, "eval_steps_per_second": 9.151, "step": 800 }, { "epoch": 16.95, "learning_rate": 4.697986577181208e-05, "loss": 0.0031, "step": 1000 }, { "epoch": 16.95, "eval_accuracy_score": 0.9879105839416058, "eval_f1": 0.7555555555555554, "eval_loss": 0.09191697090864182, "eval_precision": 0.7327586206896551, "eval_recall": 0.7798165137614679, "eval_runtime": 3.7121, "eval_samples_per_second": 71.926, "eval_steps_per_second": 9.159, "step": 1000 }, { "epoch": 20.34, "eval_accuracy_score": 0.9890510948905109, "eval_f1": 0.7927927927927928, "eval_loss": 0.08485409617424011, "eval_precision": 0.7787610619469026, "eval_recall": 0.8073394495412844, "eval_runtime": 3.7118, "eval_samples_per_second": 71.933, "eval_steps_per_second": 9.16, "step": 1200 }, { "epoch": 23.73, "eval_accuracy_score": 0.9888229927007299, "eval_f1": 0.7665198237885462, "eval_loss": 0.08115804940462112, "eval_precision": 0.7372881355932204, "eval_recall": 0.7981651376146789, "eval_runtime": 3.7091, "eval_samples_per_second": 71.984, "eval_steps_per_second": 9.167, "step": 1400 }, { "epoch": 25.42, "learning_rate": 4.530201342281879e-05, "loss": 0.0017, "step": 1500 }, { "epoch": 27.12, "eval_accuracy_score": 0.9890510948905109, "eval_f1": 0.8018433179723502, "eval_loss": 0.09983422607183456, "eval_precision": 0.8055555555555556, "eval_recall": 0.7981651376146789, "eval_runtime": 3.7138, "eval_samples_per_second": 71.894, "eval_steps_per_second": 9.155, "step": 1600 }, { "epoch": 30.51, "eval_accuracy_score": 0.9881386861313869, "eval_f1": 0.7873303167420814, "eval_loss": 0.09764721244573593, "eval_precision": 0.7767857142857143, "eval_recall": 0.7981651376146789, "eval_runtime": 3.7105, "eval_samples_per_second": 71.957, "eval_steps_per_second": 9.163, "step": 1800 }, { "epoch": 33.9, "learning_rate": 4.36241610738255e-05, "loss": 0.0022, "step": 2000 }, { "epoch": 33.9, "eval_accuracy_score": 0.9890510948905109, "eval_f1": 0.8056872037914692, "eval_loss": 0.11199253052473068, "eval_precision": 0.8333333333333334, "eval_recall": 0.7798165137614679, "eval_runtime": 3.7052, "eval_samples_per_second": 72.061, "eval_steps_per_second": 9.176, "step": 2000 }, { "epoch": 37.29, "eval_accuracy_score": 0.9879105839416058, "eval_f1": 0.7982062780269058, "eval_loss": 0.10247013717889786, "eval_precision": 0.7807017543859649, "eval_recall": 0.8165137614678899, "eval_runtime": 3.6997, "eval_samples_per_second": 72.169, "eval_steps_per_second": 9.19, "step": 2200 }, { "epoch": 40.68, "eval_accuracy_score": 0.988594890510949, "eval_f1": 0.7782805429864253, "eval_loss": 0.10762448608875275, "eval_precision": 0.7678571428571429, "eval_recall": 0.7889908256880734, "eval_runtime": 3.7002, "eval_samples_per_second": 72.159, "eval_steps_per_second": 9.189, "step": 2400 }, { "epoch": 42.37, "learning_rate": 4.194630872483222e-05, "loss": 0.0012, "step": 2500 }, { "epoch": 44.07, "eval_accuracy_score": 0.990191605839416, "eval_f1": 0.7999999999999999, "eval_loss": 0.10491974651813507, "eval_precision": 0.8316831683168316, "eval_recall": 0.7706422018348624, "eval_runtime": 3.7018, "eval_samples_per_second": 72.126, "eval_steps_per_second": 9.185, "step": 2600 }, { "epoch": 47.46, "eval_accuracy_score": 0.9888229927007299, "eval_f1": 0.8093023255813954, "eval_loss": 0.10662327706813812, "eval_precision": 0.8207547169811321, "eval_recall": 0.7981651376146789, "eval_runtime": 3.7086, "eval_samples_per_second": 71.995, "eval_steps_per_second": 9.168, "step": 2800 }, { "epoch": 50.85, "learning_rate": 4.026845637583892e-05, "loss": 0.0011, "step": 3000 }, { "epoch": 50.85, "eval_accuracy_score": 0.9881386861313869, "eval_f1": 0.7866108786610879, "eval_loss": 0.11663952469825745, "eval_precision": 0.7230769230769231, "eval_recall": 0.8623853211009175, "eval_runtime": 3.6964, "eval_samples_per_second": 72.232, "eval_steps_per_second": 9.198, "step": 3000 }, { "epoch": 54.24, "eval_accuracy_score": 0.989963503649635, "eval_f1": 0.821917808219178, "eval_loss": 0.11120918393135071, "eval_precision": 0.8181818181818182, "eval_recall": 0.8256880733944955, "eval_runtime": 3.6897, "eval_samples_per_second": 72.364, "eval_steps_per_second": 9.215, "step": 3200 }, { "epoch": 57.63, "eval_accuracy_score": 0.989279197080292, "eval_f1": 0.8195121951219513, "eval_loss": 0.11466681212186813, "eval_precision": 0.875, "eval_recall": 0.7706422018348624, "eval_runtime": 3.701, "eval_samples_per_second": 72.144, "eval_steps_per_second": 9.187, "step": 3400 }, { "epoch": 59.32, "learning_rate": 3.859060402684564e-05, "loss": 0.0011, "step": 3500 }, { "epoch": 61.02, "eval_accuracy_score": 0.988594890510949, "eval_f1": 0.7962085308056872, "eval_loss": 0.10891496390104294, "eval_precision": 0.8235294117647058, "eval_recall": 0.7706422018348624, "eval_runtime": 3.6933, "eval_samples_per_second": 72.293, "eval_steps_per_second": 9.206, "step": 3600 }, { "epoch": 64.41, "eval_accuracy_score": 0.989279197080292, "eval_f1": 0.8018867924528301, "eval_loss": 0.0993962213397026, "eval_precision": 0.8252427184466019, "eval_recall": 0.7798165137614679, "eval_runtime": 3.6945, "eval_samples_per_second": 72.27, "eval_steps_per_second": 9.203, "step": 3800 }, { "epoch": 67.8, "learning_rate": 3.6912751677852356e-05, "loss": 0.0012, "step": 4000 }, { "epoch": 67.8, "eval_accuracy_score": 0.9867700729927007, "eval_f1": 0.7439613526570049, "eval_loss": 0.13062410056591034, "eval_precision": 0.7857142857142857, "eval_recall": 0.7064220183486238, "eval_runtime": 3.689, "eval_samples_per_second": 72.378, "eval_steps_per_second": 9.217, "step": 4000 }, { "epoch": 71.19, "eval_accuracy_score": 0.9883667883211679, "eval_f1": 0.7889908256880734, "eval_loss": 0.10526341944932938, "eval_precision": 0.7889908256880734, "eval_recall": 0.7889908256880734, "eval_runtime": 3.6853, "eval_samples_per_second": 72.451, "eval_steps_per_second": 9.226, "step": 4200 }, { "epoch": 74.58, "eval_accuracy_score": 0.9904197080291971, "eval_f1": 0.8169014084507041, "eval_loss": 0.08558077365159988, "eval_precision": 0.8365384615384616, "eval_recall": 0.7981651376146789, "eval_runtime": 3.6968, "eval_samples_per_second": 72.224, "eval_steps_per_second": 9.197, "step": 4400 }, { "epoch": 76.27, "learning_rate": 3.523489932885906e-05, "loss": 0.001, "step": 4500 }, { "epoch": 77.97, "eval_accuracy_score": 0.9883667883211679, "eval_f1": 0.7733333333333334, "eval_loss": 0.11534098535776138, "eval_precision": 0.75, "eval_recall": 0.7981651376146789, "eval_runtime": 3.7034, "eval_samples_per_second": 72.096, "eval_steps_per_second": 9.181, "step": 4600 }, { "epoch": 81.36, "eval_accuracy_score": 0.9879105839416058, "eval_f1": 0.7762557077625571, "eval_loss": 0.10414384305477142, "eval_precision": 0.7727272727272727, "eval_recall": 0.7798165137614679, "eval_runtime": 3.7039, "eval_samples_per_second": 72.087, "eval_steps_per_second": 9.18, "step": 4800 }, { "epoch": 84.75, "learning_rate": 3.3557046979865775e-05, "loss": 0.0006, "step": 5000 }, { "epoch": 84.75, "eval_accuracy_score": 0.9869981751824818, "eval_f1": 0.7565217391304349, "eval_loss": 0.10606261342763901, "eval_precision": 0.71900826446281, "eval_recall": 0.7981651376146789, "eval_runtime": 3.6946, "eval_samples_per_second": 72.268, "eval_steps_per_second": 9.203, "step": 5000 }, { "epoch": 88.14, "eval_accuracy_score": 0.9883667883211679, "eval_f1": 0.7685185185185185, "eval_loss": 0.10869959741830826, "eval_precision": 0.7757009345794392, "eval_recall": 0.7614678899082569, "eval_runtime": 3.7012, "eval_samples_per_second": 72.139, "eval_steps_per_second": 9.186, "step": 5200 }, { "epoch": 91.53, "eval_accuracy_score": 0.988594890510949, "eval_f1": 0.7813953488372094, "eval_loss": 0.12209474295377731, "eval_precision": 0.7924528301886793, "eval_recall": 0.7706422018348624, "eval_runtime": 3.6806, "eval_samples_per_second": 72.542, "eval_steps_per_second": 9.238, "step": 5400 }, { "epoch": 91.53, "step": 5400, "total_flos": 2.237534270977997e+16, "train_loss": 0.009840657206045256, "train_runtime": 6068.7812, "train_samples_per_second": 79.093, "train_steps_per_second": 2.472 } ], "max_steps": 15000, "num_train_epochs": 255, "total_flos": 2.237534270977997e+16, "trial_name": null, "trial_params": null }