|
{ |
|
"best_metric": 0.6941368579864502, |
|
"best_model_checkpoint": "hBERTv1_new_pretrain_w_init_48_ver2_sst2/checkpoint-2106", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 7371, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.733333333333334e-05, |
|
"loss": 0.6948, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.694778561592102, |
|
"eval_runtime": 4.5684, |
|
"eval_samples_per_second": 190.875, |
|
"eval_steps_per_second": 3.065, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.466666666666667e-05, |
|
"loss": 0.6927, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6941368579864502, |
|
"eval_runtime": 4.5828, |
|
"eval_samples_per_second": 190.278, |
|
"eval_steps_per_second": 3.055, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.6879, |
|
"step": 3159 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7004609704017639, |
|
"eval_runtime": 4.5708, |
|
"eval_samples_per_second": 190.777, |
|
"eval_steps_per_second": 3.063, |
|
"step": 3159 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.9333333333333333e-05, |
|
"loss": 0.6873, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7004290819168091, |
|
"eval_runtime": 4.5617, |
|
"eval_samples_per_second": 191.157, |
|
"eval_steps_per_second": 3.069, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.6887, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7151248455047607, |
|
"eval_runtime": 4.571, |
|
"eval_samples_per_second": 190.768, |
|
"eval_steps_per_second": 3.063, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.6871, |
|
"step": 6318 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6974788308143616, |
|
"eval_runtime": 4.576, |
|
"eval_samples_per_second": 190.561, |
|
"eval_steps_per_second": 3.059, |
|
"step": 6318 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 2.1333333333333335e-05, |
|
"loss": 0.6859, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7067839503288269, |
|
"eval_runtime": 4.581, |
|
"eval_samples_per_second": 190.351, |
|
"eval_steps_per_second": 3.056, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 7371, |
|
"total_flos": 6.981918485852979e+16, |
|
"train_loss": 0.6891914381131503, |
|
"train_runtime": 7431.7212, |
|
"train_samples_per_second": 135.936, |
|
"train_steps_per_second": 2.125 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 15795, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 6.981918485852979e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|