|
{ |
|
"best_metric": 0.8255746290369508, |
|
"best_model_checkpoint": "/home/tmnam/Desktop/crosslingual-mining-for-domain-nli/output/pretraining/vihealthbert-w_mlm-ViMedNLI/lr3e-5_wr0.1_wd0.0/checkpoint-5000", |
|
"epoch": 105.26315789473684, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010526315789473684, |
|
"grad_norm": 44.032386779785156, |
|
"learning_rate": 1e-08, |
|
"loss": 11.5293, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 5.0217604637146, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 5.5327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"eval_accuracy": 0.588971337116252, |
|
"eval_loss": 2.7528159618377686, |
|
"eval_runtime": 0.9763, |
|
"eval_samples_per_second": 408.668, |
|
"eval_steps_per_second": 13.315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"grad_norm": 3.756152629852295, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.9051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"eval_accuracy": 0.7783207463349623, |
|
"eval_loss": 1.467841625213623, |
|
"eval_runtime": 0.9735, |
|
"eval_samples_per_second": 409.849, |
|
"eval_steps_per_second": 13.353, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 31.57894736842105, |
|
"grad_norm": 2.861485481262207, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 31.57894736842105, |
|
"eval_accuracy": 0.8019903409922435, |
|
"eval_loss": 1.1543285846710205, |
|
"eval_runtime": 0.9729, |
|
"eval_samples_per_second": 410.113, |
|
"eval_steps_per_second": 13.362, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.10526315789474, |
|
"grad_norm": 3.267728805541992, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.831, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 42.10526315789474, |
|
"eval_accuracy": 0.8146666666666667, |
|
"eval_loss": 1.097205638885498, |
|
"eval_runtime": 0.973, |
|
"eval_samples_per_second": 410.092, |
|
"eval_steps_per_second": 13.361, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 52.63157894736842, |
|
"grad_norm": 2.2974698543548584, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6805, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 52.63157894736842, |
|
"eval_accuracy": 0.8255746290369508, |
|
"eval_loss": 0.9968159794807434, |
|
"eval_runtime": 0.9748, |
|
"eval_samples_per_second": 409.295, |
|
"eval_steps_per_second": 13.335, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 63.1578947368421, |
|
"grad_norm": 2.481663703918457, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.5937, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 63.1578947368421, |
|
"eval_accuracy": 0.8242695979172693, |
|
"eval_loss": 1.0310105085372925, |
|
"eval_runtime": 0.9734, |
|
"eval_samples_per_second": 409.886, |
|
"eval_steps_per_second": 13.355, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 73.6842105263158, |
|
"grad_norm": 2.574080467224121, |
|
"learning_rate": 2.5555555555555557e-05, |
|
"loss": 0.5258, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 73.6842105263158, |
|
"eval_accuracy": 0.8151164452064581, |
|
"eval_loss": 1.1044955253601074, |
|
"eval_runtime": 0.9728, |
|
"eval_samples_per_second": 410.166, |
|
"eval_steps_per_second": 13.364, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 84.21052631578948, |
|
"grad_norm": 2.3108158111572266, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.4569, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 84.21052631578948, |
|
"eval_accuracy": 0.8254382152687237, |
|
"eval_loss": 1.0392757654190063, |
|
"eval_runtime": 0.9756, |
|
"eval_samples_per_second": 408.981, |
|
"eval_steps_per_second": 13.325, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 94.73684210526316, |
|
"grad_norm": 2.3435771465301514, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.4007, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 94.73684210526316, |
|
"eval_accuracy": 0.8216863990759458, |
|
"eval_loss": 1.0684024095535278, |
|
"eval_runtime": 0.9715, |
|
"eval_samples_per_second": 410.719, |
|
"eval_steps_per_second": 13.382, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 105.26315789473684, |
|
"grad_norm": 4.333491325378418, |
|
"learning_rate": 2.222222222222222e-05, |
|
"loss": 0.3632, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 105.26315789473684, |
|
"eval_accuracy": 0.8181686046511628, |
|
"eval_loss": 1.122326374053955, |
|
"eval_runtime": 0.9742, |
|
"eval_samples_per_second": 409.576, |
|
"eval_steps_per_second": 13.345, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 316, |
|
"save_steps": 1000, |
|
"total_flos": 2.087108169272832e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|