|
{ |
|
"best_metric": 0.8255746290369508, |
|
"best_model_checkpoint": "/home/tmnam/Desktop/crosslingual-mining-for-domain-nli/output/pretraining/vihealthbert-w_mlm-ViMedNLI/lr3e-5_wr0.1_wd0.0/checkpoint-5000", |
|
"epoch": 52.63157894736842, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010526315789473684, |
|
"grad_norm": 44.032386779785156, |
|
"learning_rate": 1e-08, |
|
"loss": 11.5293, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 5.0217604637146, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 5.5327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"eval_accuracy": 0.588971337116252, |
|
"eval_loss": 2.7528159618377686, |
|
"eval_runtime": 0.9763, |
|
"eval_samples_per_second": 408.668, |
|
"eval_steps_per_second": 13.315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"grad_norm": 3.756152629852295, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.9051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"eval_accuracy": 0.7783207463349623, |
|
"eval_loss": 1.467841625213623, |
|
"eval_runtime": 0.9735, |
|
"eval_samples_per_second": 409.849, |
|
"eval_steps_per_second": 13.353, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 31.57894736842105, |
|
"grad_norm": 2.861485481262207, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 31.57894736842105, |
|
"eval_accuracy": 0.8019903409922435, |
|
"eval_loss": 1.1543285846710205, |
|
"eval_runtime": 0.9729, |
|
"eval_samples_per_second": 410.113, |
|
"eval_steps_per_second": 13.362, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.10526315789474, |
|
"grad_norm": 3.267728805541992, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.831, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 42.10526315789474, |
|
"eval_accuracy": 0.8146666666666667, |
|
"eval_loss": 1.097205638885498, |
|
"eval_runtime": 0.973, |
|
"eval_samples_per_second": 410.092, |
|
"eval_steps_per_second": 13.361, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 52.63157894736842, |
|
"grad_norm": 2.2974698543548584, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6805, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 52.63157894736842, |
|
"eval_accuracy": 0.8255746290369508, |
|
"eval_loss": 0.9968159794807434, |
|
"eval_runtime": 0.9748, |
|
"eval_samples_per_second": 409.295, |
|
"eval_steps_per_second": 13.335, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 316, |
|
"save_steps": 1000, |
|
"total_flos": 1.0436462423175168e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|