roberta-base-downstream-build_rr / trainer_state.json
MHGanainy's picture
End of training
02c1e74 verified
raw
history blame
8.75 kB
{
"best_metric": 0.7829107328933658,
"best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-682",
"epoch": 14.0,
"eval_steps": 500,
"global_step": 868,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6509204584925321,
"eval_loss": 1.1796680688858032,
"eval_macro-f1": 0.2406374552281572,
"eval_micro-f1": 0.6509204584925321,
"eval_precision-macro": 0.36507985738726567,
"eval_precision-micro": 0.6509204584925321,
"eval_recall-macro": 0.24249037703341708,
"eval_recall-micro": 0.6509204584925321,
"eval_runtime": 1.5286,
"eval_samples_per_second": 19.625,
"eval_steps_per_second": 5.233,
"step": 62
},
{
"epoch": 2.0,
"eval_accuracy": 0.7349774227162209,
"eval_loss": 0.8353763222694397,
"eval_macro-f1": 0.5254710956125371,
"eval_micro-f1": 0.7349774227162209,
"eval_precision-macro": 0.5349847227650002,
"eval_precision-micro": 0.7349774227162209,
"eval_recall-macro": 0.5291052629047126,
"eval_recall-micro": 0.7349774227162209,
"eval_runtime": 1.6436,
"eval_samples_per_second": 18.252,
"eval_steps_per_second": 4.867,
"step": 124
},
{
"epoch": 3.0,
"eval_accuracy": 0.7342827370614797,
"eval_loss": 0.8058456182479858,
"eval_macro-f1": 0.536617185045311,
"eval_micro-f1": 0.7342827370614797,
"eval_precision-macro": 0.5558680331273026,
"eval_precision-micro": 0.7342827370614797,
"eval_recall-macro": 0.5381948279596481,
"eval_recall-micro": 0.7342827370614797,
"eval_runtime": 1.5882,
"eval_samples_per_second": 18.889,
"eval_steps_per_second": 5.037,
"step": 186
},
{
"epoch": 4.0,
"eval_accuracy": 0.7502605071205279,
"eval_loss": 0.7717716097831726,
"eval_macro-f1": 0.530010703703156,
"eval_micro-f1": 0.7502605071205279,
"eval_precision-macro": 0.6245985387634561,
"eval_precision-micro": 0.7502605071205279,
"eval_recall-macro": 0.5200955397553431,
"eval_recall-micro": 0.7502605071205279,
"eval_runtime": 1.601,
"eval_samples_per_second": 18.738,
"eval_steps_per_second": 4.997,
"step": 248
},
{
"epoch": 5.0,
"eval_accuracy": 0.7641542202153525,
"eval_loss": 0.7306948900222778,
"eval_macro-f1": 0.5578515609115684,
"eval_micro-f1": 0.7641542202153525,
"eval_precision-macro": 0.5889876526435496,
"eval_precision-micro": 0.7641542202153525,
"eval_recall-macro": 0.5462553107739512,
"eval_recall-micro": 0.7641542202153525,
"eval_runtime": 2.1379,
"eval_samples_per_second": 14.032,
"eval_steps_per_second": 3.742,
"step": 310
},
{
"epoch": 6.0,
"eval_accuracy": 0.774574505036471,
"eval_loss": 0.7098783254623413,
"eval_macro-f1": 0.5481274858908393,
"eval_micro-f1": 0.774574505036471,
"eval_precision-macro": 0.6076438792386994,
"eval_precision-micro": 0.774574505036471,
"eval_recall-macro": 0.5431283891127849,
"eval_recall-micro": 0.774574505036471,
"eval_runtime": 2.1932,
"eval_samples_per_second": 13.679,
"eval_steps_per_second": 3.648,
"step": 372
},
{
"epoch": 7.0,
"eval_accuracy": 0.7811740187565127,
"eval_loss": 0.7071970701217651,
"eval_macro-f1": 0.5261426411307122,
"eval_micro-f1": 0.7811740187565127,
"eval_precision-macro": 0.6089513985670642,
"eval_precision-micro": 0.7811740187565127,
"eval_recall-macro": 0.5125569147899907,
"eval_recall-micro": 0.7811740187565127,
"eval_runtime": 1.6371,
"eval_samples_per_second": 18.326,
"eval_steps_per_second": 4.887,
"step": 434
},
{
"epoch": 8.0,
"eval_accuracy": 0.7825633900659952,
"eval_loss": 0.6919089555740356,
"eval_macro-f1": 0.5675875408188613,
"eval_micro-f1": 0.7825633900659952,
"eval_precision-macro": 0.6321251715307294,
"eval_precision-micro": 0.7825633900659952,
"eval_recall-macro": 0.5470775441802167,
"eval_recall-micro": 0.7825633900659952,
"eval_runtime": 1.6786,
"eval_samples_per_second": 17.872,
"eval_steps_per_second": 4.766,
"step": 496
},
{
"epoch": 8.064516129032258,
"grad_norm": 7.422909736633301,
"learning_rate": 1.7951612903225806e-05,
"loss": 0.8758,
"step": 500
},
{
"epoch": 9.0,
"eval_accuracy": 0.7735324765543592,
"eval_loss": 0.7503196597099304,
"eval_macro-f1": 0.569622502930968,
"eval_micro-f1": 0.7735324765543592,
"eval_precision-macro": 0.5665598749803964,
"eval_precision-micro": 0.7735324765543592,
"eval_recall-macro": 0.5818475586367124,
"eval_recall-micro": 0.7735324765543592,
"eval_runtime": 1.5468,
"eval_samples_per_second": 19.395,
"eval_steps_per_second": 5.172,
"step": 558
},
{
"epoch": 10.0,
"eval_accuracy": 0.7783952761375478,
"eval_loss": 0.7511970400810242,
"eval_macro-f1": 0.5755339015228546,
"eval_micro-f1": 0.7783952761375478,
"eval_precision-macro": 0.6053985952851118,
"eval_precision-micro": 0.7783952761375478,
"eval_recall-macro": 0.5655629578179421,
"eval_recall-micro": 0.7783952761375478,
"eval_runtime": 2.2548,
"eval_samples_per_second": 13.305,
"eval_steps_per_second": 3.548,
"step": 620
},
{
"epoch": 11.0,
"eval_accuracy": 0.7829107328933658,
"eval_loss": 0.7655877470970154,
"eval_macro-f1": 0.591328685794211,
"eval_micro-f1": 0.7829107328933658,
"eval_precision-macro": 0.6085589543807931,
"eval_precision-micro": 0.7829107328933658,
"eval_recall-macro": 0.5834711775606751,
"eval_recall-micro": 0.7829107328933658,
"eval_runtime": 2.2566,
"eval_samples_per_second": 13.295,
"eval_steps_per_second": 3.545,
"step": 682
},
{
"epoch": 12.0,
"eval_accuracy": 0.7738798193817298,
"eval_loss": 0.786118745803833,
"eval_macro-f1": 0.5843444583481733,
"eval_micro-f1": 0.7738798193817298,
"eval_precision-macro": 0.5971774078353586,
"eval_precision-micro": 0.7738798193817298,
"eval_recall-macro": 0.5885123710730829,
"eval_recall-micro": 0.7738798193817298,
"eval_runtime": 1.5545,
"eval_samples_per_second": 19.299,
"eval_steps_per_second": 5.146,
"step": 744
},
{
"epoch": 13.0,
"eval_accuracy": 0.7780479333101772,
"eval_loss": 0.8238919377326965,
"eval_macro-f1": 0.5701476938599402,
"eval_micro-f1": 0.7780479333101772,
"eval_precision-macro": 0.5975031172688886,
"eval_precision-micro": 0.7780479333101772,
"eval_recall-macro": 0.5748658781079373,
"eval_recall-micro": 0.7780479333101772,
"eval_runtime": 1.5795,
"eval_samples_per_second": 18.993,
"eval_steps_per_second": 5.065,
"step": 806
},
{
"epoch": 14.0,
"eval_accuracy": 0.7797846474470302,
"eval_loss": 0.8271887302398682,
"eval_macro-f1": 0.592619371017184,
"eval_micro-f1": 0.7797846474470302,
"eval_precision-macro": 0.6088825353073621,
"eval_precision-micro": 0.7797846474470302,
"eval_recall-macro": 0.5868004304340952,
"eval_recall-micro": 0.7797846474470302,
"eval_runtime": 2.326,
"eval_samples_per_second": 12.898,
"eval_steps_per_second": 3.439,
"step": 868
},
{
"epoch": 14.0,
"step": 868,
"total_flos": 6.747257278287053e+16,
"train_loss": 0.6624447870913739,
"train_runtime": 488.5119,
"train_samples_per_second": 10.112,
"train_steps_per_second": 2.538
}
],
"logging_steps": 500,
"max_steps": 1240,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.747257278287053e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}