|
{ |
|
"best_metric": 0.7829107328933658, |
|
"best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-682", |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 868, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6509204584925321, |
|
"eval_loss": 1.1796680688858032, |
|
"eval_macro-f1": 0.2406374552281572, |
|
"eval_micro-f1": 0.6509204584925321, |
|
"eval_precision-macro": 0.36507985738726567, |
|
"eval_precision-micro": 0.6509204584925321, |
|
"eval_recall-macro": 0.24249037703341708, |
|
"eval_recall-micro": 0.6509204584925321, |
|
"eval_runtime": 1.5286, |
|
"eval_samples_per_second": 19.625, |
|
"eval_steps_per_second": 5.233, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7349774227162209, |
|
"eval_loss": 0.8353763222694397, |
|
"eval_macro-f1": 0.5254710956125371, |
|
"eval_micro-f1": 0.7349774227162209, |
|
"eval_precision-macro": 0.5349847227650002, |
|
"eval_precision-micro": 0.7349774227162209, |
|
"eval_recall-macro": 0.5291052629047126, |
|
"eval_recall-micro": 0.7349774227162209, |
|
"eval_runtime": 1.6436, |
|
"eval_samples_per_second": 18.252, |
|
"eval_steps_per_second": 4.867, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7342827370614797, |
|
"eval_loss": 0.8058456182479858, |
|
"eval_macro-f1": 0.536617185045311, |
|
"eval_micro-f1": 0.7342827370614797, |
|
"eval_precision-macro": 0.5558680331273026, |
|
"eval_precision-micro": 0.7342827370614797, |
|
"eval_recall-macro": 0.5381948279596481, |
|
"eval_recall-micro": 0.7342827370614797, |
|
"eval_runtime": 1.5882, |
|
"eval_samples_per_second": 18.889, |
|
"eval_steps_per_second": 5.037, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7502605071205279, |
|
"eval_loss": 0.7717716097831726, |
|
"eval_macro-f1": 0.530010703703156, |
|
"eval_micro-f1": 0.7502605071205279, |
|
"eval_precision-macro": 0.6245985387634561, |
|
"eval_precision-micro": 0.7502605071205279, |
|
"eval_recall-macro": 0.5200955397553431, |
|
"eval_recall-micro": 0.7502605071205279, |
|
"eval_runtime": 1.601, |
|
"eval_samples_per_second": 18.738, |
|
"eval_steps_per_second": 4.997, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7641542202153525, |
|
"eval_loss": 0.7306948900222778, |
|
"eval_macro-f1": 0.5578515609115684, |
|
"eval_micro-f1": 0.7641542202153525, |
|
"eval_precision-macro": 0.5889876526435496, |
|
"eval_precision-micro": 0.7641542202153525, |
|
"eval_recall-macro": 0.5462553107739512, |
|
"eval_recall-micro": 0.7641542202153525, |
|
"eval_runtime": 2.1379, |
|
"eval_samples_per_second": 14.032, |
|
"eval_steps_per_second": 3.742, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.774574505036471, |
|
"eval_loss": 0.7098783254623413, |
|
"eval_macro-f1": 0.5481274858908393, |
|
"eval_micro-f1": 0.774574505036471, |
|
"eval_precision-macro": 0.6076438792386994, |
|
"eval_precision-micro": 0.774574505036471, |
|
"eval_recall-macro": 0.5431283891127849, |
|
"eval_recall-micro": 0.774574505036471, |
|
"eval_runtime": 2.1932, |
|
"eval_samples_per_second": 13.679, |
|
"eval_steps_per_second": 3.648, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7811740187565127, |
|
"eval_loss": 0.7071970701217651, |
|
"eval_macro-f1": 0.5261426411307122, |
|
"eval_micro-f1": 0.7811740187565127, |
|
"eval_precision-macro": 0.6089513985670642, |
|
"eval_precision-micro": 0.7811740187565127, |
|
"eval_recall-macro": 0.5125569147899907, |
|
"eval_recall-micro": 0.7811740187565127, |
|
"eval_runtime": 1.6371, |
|
"eval_samples_per_second": 18.326, |
|
"eval_steps_per_second": 4.887, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7825633900659952, |
|
"eval_loss": 0.6919089555740356, |
|
"eval_macro-f1": 0.5675875408188613, |
|
"eval_micro-f1": 0.7825633900659952, |
|
"eval_precision-macro": 0.6321251715307294, |
|
"eval_precision-micro": 0.7825633900659952, |
|
"eval_recall-macro": 0.5470775441802167, |
|
"eval_recall-micro": 0.7825633900659952, |
|
"eval_runtime": 1.6786, |
|
"eval_samples_per_second": 17.872, |
|
"eval_steps_per_second": 4.766, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 8.064516129032258, |
|
"grad_norm": 7.422909736633301, |
|
"learning_rate": 1.7951612903225806e-05, |
|
"loss": 0.8758, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7735324765543592, |
|
"eval_loss": 0.7503196597099304, |
|
"eval_macro-f1": 0.569622502930968, |
|
"eval_micro-f1": 0.7735324765543592, |
|
"eval_precision-macro": 0.5665598749803964, |
|
"eval_precision-micro": 0.7735324765543592, |
|
"eval_recall-macro": 0.5818475586367124, |
|
"eval_recall-micro": 0.7735324765543592, |
|
"eval_runtime": 1.5468, |
|
"eval_samples_per_second": 19.395, |
|
"eval_steps_per_second": 5.172, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7783952761375478, |
|
"eval_loss": 0.7511970400810242, |
|
"eval_macro-f1": 0.5755339015228546, |
|
"eval_micro-f1": 0.7783952761375478, |
|
"eval_precision-macro": 0.6053985952851118, |
|
"eval_precision-micro": 0.7783952761375478, |
|
"eval_recall-macro": 0.5655629578179421, |
|
"eval_recall-micro": 0.7783952761375478, |
|
"eval_runtime": 2.2548, |
|
"eval_samples_per_second": 13.305, |
|
"eval_steps_per_second": 3.548, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7829107328933658, |
|
"eval_loss": 0.7655877470970154, |
|
"eval_macro-f1": 0.591328685794211, |
|
"eval_micro-f1": 0.7829107328933658, |
|
"eval_precision-macro": 0.6085589543807931, |
|
"eval_precision-micro": 0.7829107328933658, |
|
"eval_recall-macro": 0.5834711775606751, |
|
"eval_recall-micro": 0.7829107328933658, |
|
"eval_runtime": 2.2566, |
|
"eval_samples_per_second": 13.295, |
|
"eval_steps_per_second": 3.545, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7738798193817298, |
|
"eval_loss": 0.786118745803833, |
|
"eval_macro-f1": 0.5843444583481733, |
|
"eval_micro-f1": 0.7738798193817298, |
|
"eval_precision-macro": 0.5971774078353586, |
|
"eval_precision-micro": 0.7738798193817298, |
|
"eval_recall-macro": 0.5885123710730829, |
|
"eval_recall-micro": 0.7738798193817298, |
|
"eval_runtime": 1.5545, |
|
"eval_samples_per_second": 19.299, |
|
"eval_steps_per_second": 5.146, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7780479333101772, |
|
"eval_loss": 0.8238919377326965, |
|
"eval_macro-f1": 0.5701476938599402, |
|
"eval_micro-f1": 0.7780479333101772, |
|
"eval_precision-macro": 0.5975031172688886, |
|
"eval_precision-micro": 0.7780479333101772, |
|
"eval_recall-macro": 0.5748658781079373, |
|
"eval_recall-micro": 0.7780479333101772, |
|
"eval_runtime": 1.5795, |
|
"eval_samples_per_second": 18.993, |
|
"eval_steps_per_second": 5.065, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7797846474470302, |
|
"eval_loss": 0.8271887302398682, |
|
"eval_macro-f1": 0.592619371017184, |
|
"eval_micro-f1": 0.7797846474470302, |
|
"eval_precision-macro": 0.6088825353073621, |
|
"eval_precision-micro": 0.7797846474470302, |
|
"eval_recall-macro": 0.5868004304340952, |
|
"eval_recall-micro": 0.7797846474470302, |
|
"eval_runtime": 2.326, |
|
"eval_samples_per_second": 12.898, |
|
"eval_steps_per_second": 3.439, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 868, |
|
"total_flos": 6.747257278287053e+16, |
|
"train_loss": 0.6624447870913739, |
|
"train_runtime": 488.5119, |
|
"train_samples_per_second": 10.112, |
|
"train_steps_per_second": 2.538 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.747257278287053e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|