|
{ |
|
"best_metric": 0.6991622239146992, |
|
"best_model_checkpoint": "logs/ecthr_a/roberta-base/seed_1/checkpoint-1128", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1974, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.17883636057376862, |
|
"eval_macro-f1": 0.5361152463526995, |
|
"eval_micro-f1": 0.6690590111642744, |
|
"eval_runtime": 8.3244, |
|
"eval_samples_per_second": 120.128, |
|
"eval_steps_per_second": 3.844, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.773049645390071, |
|
"grad_norm": 1.5047844648361206, |
|
"learning_rate": 2.7340425531914897e-05, |
|
"loss": 0.1598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.165725439786911, |
|
"eval_macro-f1": 0.5864998053589437, |
|
"eval_micro-f1": 0.6876456876456877, |
|
"eval_runtime": 8.9929, |
|
"eval_samples_per_second": 111.199, |
|
"eval_steps_per_second": 3.558, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.1847357153892517, |
|
"eval_macro-f1": 0.619725405380703, |
|
"eval_micro-f1": 0.6802973977695167, |
|
"eval_runtime": 9.0294, |
|
"eval_samples_per_second": 110.749, |
|
"eval_steps_per_second": 3.544, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 3.546099290780142, |
|
"grad_norm": 1.266696572303772, |
|
"learning_rate": 2.4680851063829786e-05, |
|
"loss": 0.1038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.17048540711402893, |
|
"eval_macro-f1": 0.6382923768808028, |
|
"eval_micro-f1": 0.6991622239146992, |
|
"eval_runtime": 8.9765, |
|
"eval_samples_per_second": 111.402, |
|
"eval_steps_per_second": 3.565, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.18128305673599243, |
|
"eval_macro-f1": 0.6483583317279754, |
|
"eval_micro-f1": 0.6948249619482496, |
|
"eval_runtime": 8.4919, |
|
"eval_samples_per_second": 117.76, |
|
"eval_steps_per_second": 3.768, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 1.974063754081726, |
|
"learning_rate": 2.2026595744680854e-05, |
|
"loss": 0.0835, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.1945626437664032, |
|
"eval_macro-f1": 0.6427139982243243, |
|
"eval_micro-f1": 0.6928838951310862, |
|
"eval_runtime": 8.3849, |
|
"eval_samples_per_second": 119.262, |
|
"eval_steps_per_second": 3.816, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.20862846076488495, |
|
"eval_macro-f1": 0.6248697036429669, |
|
"eval_micro-f1": 0.6922798115259152, |
|
"eval_runtime": 8.6109, |
|
"eval_samples_per_second": 116.132, |
|
"eval_steps_per_second": 3.716, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 1974, |
|
"total_flos": 3.099603884499272e+17, |
|
"train_loss": 0.1036064192396046, |
|
"train_runtime": 1162.1568, |
|
"train_samples_per_second": 154.884, |
|
"train_steps_per_second": 4.853 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.099603884499272e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|