|
{ |
|
"best_metric": 0.8056872037914692, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-wol/checkpoint-2000", |
|
"epoch": 91.52542372881356, |
|
"global_step": 5400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.39, |
|
"eval_accuracy_score": 0.9890510948905109, |
|
"eval_f1": 0.7586206896551724, |
|
"eval_loss": 0.06563546508550644, |
|
"eval_precision": 0.8191489361702128, |
|
"eval_recall": 0.7064220183486238, |
|
"eval_runtime": 3.7154, |
|
"eval_samples_per_second": 71.864, |
|
"eval_steps_per_second": 9.151, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_accuracy_score": 0.989507299270073, |
|
"eval_f1": 0.8076923076923077, |
|
"eval_loss": 0.07390450686216354, |
|
"eval_precision": 0.8484848484848485, |
|
"eval_recall": 0.7706422018348624, |
|
"eval_runtime": 3.7144, |
|
"eval_samples_per_second": 71.883, |
|
"eval_steps_per_second": 9.154, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.0928, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"eval_accuracy_score": 0.989507299270073, |
|
"eval_f1": 0.7926267281105991, |
|
"eval_loss": 0.0707671046257019, |
|
"eval_precision": 0.7962962962962963, |
|
"eval_recall": 0.7889908256880734, |
|
"eval_runtime": 3.7124, |
|
"eval_samples_per_second": 71.921, |
|
"eval_steps_per_second": 9.159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"eval_accuracy_score": 0.9890510948905109, |
|
"eval_f1": 0.7924528301886793, |
|
"eval_loss": 0.08138999342918396, |
|
"eval_precision": 0.8155339805825242, |
|
"eval_recall": 0.7706422018348624, |
|
"eval_runtime": 3.7153, |
|
"eval_samples_per_second": 71.864, |
|
"eval_steps_per_second": 9.151, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0031, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy_score": 0.9879105839416058, |
|
"eval_f1": 0.7555555555555554, |
|
"eval_loss": 0.09191697090864182, |
|
"eval_precision": 0.7327586206896551, |
|
"eval_recall": 0.7798165137614679, |
|
"eval_runtime": 3.7121, |
|
"eval_samples_per_second": 71.926, |
|
"eval_steps_per_second": 9.159, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"eval_accuracy_score": 0.9890510948905109, |
|
"eval_f1": 0.7927927927927928, |
|
"eval_loss": 0.08485409617424011, |
|
"eval_precision": 0.7787610619469026, |
|
"eval_recall": 0.8073394495412844, |
|
"eval_runtime": 3.7118, |
|
"eval_samples_per_second": 71.933, |
|
"eval_steps_per_second": 9.16, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"eval_accuracy_score": 0.9888229927007299, |
|
"eval_f1": 0.7665198237885462, |
|
"eval_loss": 0.08115804940462112, |
|
"eval_precision": 0.7372881355932204, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.7091, |
|
"eval_samples_per_second": 71.984, |
|
"eval_steps_per_second": 9.167, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0017, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_accuracy_score": 0.9890510948905109, |
|
"eval_f1": 0.8018433179723502, |
|
"eval_loss": 0.09983422607183456, |
|
"eval_precision": 0.8055555555555556, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.7138, |
|
"eval_samples_per_second": 71.894, |
|
"eval_steps_per_second": 9.155, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 30.51, |
|
"eval_accuracy_score": 0.9881386861313869, |
|
"eval_f1": 0.7873303167420814, |
|
"eval_loss": 0.09764721244573593, |
|
"eval_precision": 0.7767857142857143, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.7105, |
|
"eval_samples_per_second": 71.957, |
|
"eval_steps_per_second": 9.163, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0022, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"eval_accuracy_score": 0.9890510948905109, |
|
"eval_f1": 0.8056872037914692, |
|
"eval_loss": 0.11199253052473068, |
|
"eval_precision": 0.8333333333333334, |
|
"eval_recall": 0.7798165137614679, |
|
"eval_runtime": 3.7052, |
|
"eval_samples_per_second": 72.061, |
|
"eval_steps_per_second": 9.176, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"eval_accuracy_score": 0.9879105839416058, |
|
"eval_f1": 0.7982062780269058, |
|
"eval_loss": 0.10247013717889786, |
|
"eval_precision": 0.7807017543859649, |
|
"eval_recall": 0.8165137614678899, |
|
"eval_runtime": 3.6997, |
|
"eval_samples_per_second": 72.169, |
|
"eval_steps_per_second": 9.19, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"eval_accuracy_score": 0.988594890510949, |
|
"eval_f1": 0.7782805429864253, |
|
"eval_loss": 0.10762448608875275, |
|
"eval_precision": 0.7678571428571429, |
|
"eval_recall": 0.7889908256880734, |
|
"eval_runtime": 3.7002, |
|
"eval_samples_per_second": 72.159, |
|
"eval_steps_per_second": 9.189, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0012, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 44.07, |
|
"eval_accuracy_score": 0.990191605839416, |
|
"eval_f1": 0.7999999999999999, |
|
"eval_loss": 0.10491974651813507, |
|
"eval_precision": 0.8316831683168316, |
|
"eval_recall": 0.7706422018348624, |
|
"eval_runtime": 3.7018, |
|
"eval_samples_per_second": 72.126, |
|
"eval_steps_per_second": 9.185, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 47.46, |
|
"eval_accuracy_score": 0.9888229927007299, |
|
"eval_f1": 0.8093023255813954, |
|
"eval_loss": 0.10662327706813812, |
|
"eval_precision": 0.8207547169811321, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.7086, |
|
"eval_samples_per_second": 71.995, |
|
"eval_steps_per_second": 9.168, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0011, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"eval_accuracy_score": 0.9881386861313869, |
|
"eval_f1": 0.7866108786610879, |
|
"eval_loss": 0.11663952469825745, |
|
"eval_precision": 0.7230769230769231, |
|
"eval_recall": 0.8623853211009175, |
|
"eval_runtime": 3.6964, |
|
"eval_samples_per_second": 72.232, |
|
"eval_steps_per_second": 9.198, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"eval_accuracy_score": 0.989963503649635, |
|
"eval_f1": 0.821917808219178, |
|
"eval_loss": 0.11120918393135071, |
|
"eval_precision": 0.8181818181818182, |
|
"eval_recall": 0.8256880733944955, |
|
"eval_runtime": 3.6897, |
|
"eval_samples_per_second": 72.364, |
|
"eval_steps_per_second": 9.215, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 57.63, |
|
"eval_accuracy_score": 0.989279197080292, |
|
"eval_f1": 0.8195121951219513, |
|
"eval_loss": 0.11466681212186813, |
|
"eval_precision": 0.875, |
|
"eval_recall": 0.7706422018348624, |
|
"eval_runtime": 3.701, |
|
"eval_samples_per_second": 72.144, |
|
"eval_steps_per_second": 9.187, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 59.32, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0011, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 61.02, |
|
"eval_accuracy_score": 0.988594890510949, |
|
"eval_f1": 0.7962085308056872, |
|
"eval_loss": 0.10891496390104294, |
|
"eval_precision": 0.8235294117647058, |
|
"eval_recall": 0.7706422018348624, |
|
"eval_runtime": 3.6933, |
|
"eval_samples_per_second": 72.293, |
|
"eval_steps_per_second": 9.206, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 64.41, |
|
"eval_accuracy_score": 0.989279197080292, |
|
"eval_f1": 0.8018867924528301, |
|
"eval_loss": 0.0993962213397026, |
|
"eval_precision": 0.8252427184466019, |
|
"eval_recall": 0.7798165137614679, |
|
"eval_runtime": 3.6945, |
|
"eval_samples_per_second": 72.27, |
|
"eval_steps_per_second": 9.203, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0012, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"eval_accuracy_score": 0.9867700729927007, |
|
"eval_f1": 0.7439613526570049, |
|
"eval_loss": 0.13062410056591034, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.7064220183486238, |
|
"eval_runtime": 3.689, |
|
"eval_samples_per_second": 72.378, |
|
"eval_steps_per_second": 9.217, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 71.19, |
|
"eval_accuracy_score": 0.9883667883211679, |
|
"eval_f1": 0.7889908256880734, |
|
"eval_loss": 0.10526341944932938, |
|
"eval_precision": 0.7889908256880734, |
|
"eval_recall": 0.7889908256880734, |
|
"eval_runtime": 3.6853, |
|
"eval_samples_per_second": 72.451, |
|
"eval_steps_per_second": 9.226, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 74.58, |
|
"eval_accuracy_score": 0.9904197080291971, |
|
"eval_f1": 0.8169014084507041, |
|
"eval_loss": 0.08558077365159988, |
|
"eval_precision": 0.8365384615384616, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.6968, |
|
"eval_samples_per_second": 72.224, |
|
"eval_steps_per_second": 9.197, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 76.27, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 77.97, |
|
"eval_accuracy_score": 0.9883667883211679, |
|
"eval_f1": 0.7733333333333334, |
|
"eval_loss": 0.11534098535776138, |
|
"eval_precision": 0.75, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.7034, |
|
"eval_samples_per_second": 72.096, |
|
"eval_steps_per_second": 9.181, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 81.36, |
|
"eval_accuracy_score": 0.9879105839416058, |
|
"eval_f1": 0.7762557077625571, |
|
"eval_loss": 0.10414384305477142, |
|
"eval_precision": 0.7727272727272727, |
|
"eval_recall": 0.7798165137614679, |
|
"eval_runtime": 3.7039, |
|
"eval_samples_per_second": 72.087, |
|
"eval_steps_per_second": 9.18, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 84.75, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0006, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 84.75, |
|
"eval_accuracy_score": 0.9869981751824818, |
|
"eval_f1": 0.7565217391304349, |
|
"eval_loss": 0.10606261342763901, |
|
"eval_precision": 0.71900826446281, |
|
"eval_recall": 0.7981651376146789, |
|
"eval_runtime": 3.6946, |
|
"eval_samples_per_second": 72.268, |
|
"eval_steps_per_second": 9.203, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 88.14, |
|
"eval_accuracy_score": 0.9883667883211679, |
|
"eval_f1": 0.7685185185185185, |
|
"eval_loss": 0.10869959741830826, |
|
"eval_precision": 0.7757009345794392, |
|
"eval_recall": 0.7614678899082569, |
|
"eval_runtime": 3.7012, |
|
"eval_samples_per_second": 72.139, |
|
"eval_steps_per_second": 9.186, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 91.53, |
|
"eval_accuracy_score": 0.988594890510949, |
|
"eval_f1": 0.7813953488372094, |
|
"eval_loss": 0.12209474295377731, |
|
"eval_precision": 0.7924528301886793, |
|
"eval_recall": 0.7706422018348624, |
|
"eval_runtime": 3.6806, |
|
"eval_samples_per_second": 72.542, |
|
"eval_steps_per_second": 9.238, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 91.53, |
|
"step": 5400, |
|
"total_flos": 2.237534270977997e+16, |
|
"train_loss": 0.009840657206045256, |
|
"train_runtime": 6068.7812, |
|
"train_samples_per_second": 79.093, |
|
"train_steps_per_second": 2.472 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 255, |
|
"total_flos": 2.237534270977997e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|