|
{ |
|
"best_metric": 0.8348446558644269, |
|
"best_model_checkpoint": "/home/tmnam/Desktop/crosslingual-mining-for-domain-nli/output/pretraining/vihealthbert-w_mlm-ViMedNLI/lr3e-5_wr0.1_wd0.0/checkpoint-19000", |
|
"epoch": 315.7894736842105, |
|
"eval_steps": 1000, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010526315789473684, |
|
"grad_norm": 44.032386779785156, |
|
"learning_rate": 1e-08, |
|
"loss": 11.5293, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 5.0217604637146, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 5.5327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"eval_accuracy": 0.588971337116252, |
|
"eval_loss": 2.7528159618377686, |
|
"eval_runtime": 0.9763, |
|
"eval_samples_per_second": 408.668, |
|
"eval_steps_per_second": 13.315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"grad_norm": 3.756152629852295, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.9051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"eval_accuracy": 0.7783207463349623, |
|
"eval_loss": 1.467841625213623, |
|
"eval_runtime": 0.9735, |
|
"eval_samples_per_second": 409.849, |
|
"eval_steps_per_second": 13.353, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 31.57894736842105, |
|
"grad_norm": 2.861485481262207, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 31.57894736842105, |
|
"eval_accuracy": 0.8019903409922435, |
|
"eval_loss": 1.1543285846710205, |
|
"eval_runtime": 0.9729, |
|
"eval_samples_per_second": 410.113, |
|
"eval_steps_per_second": 13.362, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.10526315789474, |
|
"grad_norm": 3.267728805541992, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.831, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 42.10526315789474, |
|
"eval_accuracy": 0.8146666666666667, |
|
"eval_loss": 1.097205638885498, |
|
"eval_runtime": 0.973, |
|
"eval_samples_per_second": 410.092, |
|
"eval_steps_per_second": 13.361, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 52.63157894736842, |
|
"grad_norm": 2.2974698543548584, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6805, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 52.63157894736842, |
|
"eval_accuracy": 0.8255746290369508, |
|
"eval_loss": 0.9968159794807434, |
|
"eval_runtime": 0.9748, |
|
"eval_samples_per_second": 409.295, |
|
"eval_steps_per_second": 13.335, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 63.1578947368421, |
|
"grad_norm": 2.481663703918457, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.5937, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 63.1578947368421, |
|
"eval_accuracy": 0.8242695979172693, |
|
"eval_loss": 1.0310105085372925, |
|
"eval_runtime": 0.9734, |
|
"eval_samples_per_second": 409.886, |
|
"eval_steps_per_second": 13.355, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 73.6842105263158, |
|
"grad_norm": 2.574080467224121, |
|
"learning_rate": 2.5555555555555557e-05, |
|
"loss": 0.5258, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 73.6842105263158, |
|
"eval_accuracy": 0.8151164452064581, |
|
"eval_loss": 1.1044955253601074, |
|
"eval_runtime": 0.9728, |
|
"eval_samples_per_second": 410.166, |
|
"eval_steps_per_second": 13.364, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 84.21052631578948, |
|
"grad_norm": 2.3108158111572266, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.4569, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 84.21052631578948, |
|
"eval_accuracy": 0.8254382152687237, |
|
"eval_loss": 1.0392757654190063, |
|
"eval_runtime": 0.9756, |
|
"eval_samples_per_second": 408.981, |
|
"eval_steps_per_second": 13.325, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 94.73684210526316, |
|
"grad_norm": 2.3435771465301514, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.4007, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 94.73684210526316, |
|
"eval_accuracy": 0.8216863990759458, |
|
"eval_loss": 1.0684024095535278, |
|
"eval_runtime": 0.9715, |
|
"eval_samples_per_second": 410.719, |
|
"eval_steps_per_second": 13.382, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 105.26315789473684, |
|
"grad_norm": 4.333491325378418, |
|
"learning_rate": 2.222222222222222e-05, |
|
"loss": 0.3632, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 105.26315789473684, |
|
"eval_accuracy": 0.8181686046511628, |
|
"eval_loss": 1.122326374053955, |
|
"eval_runtime": 0.9742, |
|
"eval_samples_per_second": 409.576, |
|
"eval_steps_per_second": 13.345, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 115.78947368421052, |
|
"grad_norm": 2.059063196182251, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.3343, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 115.78947368421052, |
|
"eval_accuracy": 0.8230374174614821, |
|
"eval_loss": 1.1047978401184082, |
|
"eval_runtime": 0.9722, |
|
"eval_samples_per_second": 410.402, |
|
"eval_steps_per_second": 13.371, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 126.3157894736842, |
|
"grad_norm": 2.49455189704895, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.2998, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 126.3157894736842, |
|
"eval_accuracy": 0.8217864923747277, |
|
"eval_loss": 1.0996218919754028, |
|
"eval_runtime": 0.9721, |
|
"eval_samples_per_second": 410.435, |
|
"eval_steps_per_second": 13.373, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 136.8421052631579, |
|
"grad_norm": 2.695810556411743, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.2817, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 136.8421052631579, |
|
"eval_accuracy": 0.8320094145336864, |
|
"eval_loss": 1.0879673957824707, |
|
"eval_runtime": 0.9717, |
|
"eval_samples_per_second": 410.637, |
|
"eval_steps_per_second": 13.379, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 147.3684210526316, |
|
"grad_norm": 2.2958600521087646, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.2568, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 147.3684210526316, |
|
"eval_accuracy": 0.821608040201005, |
|
"eval_loss": 1.1188596487045288, |
|
"eval_runtime": 0.9727, |
|
"eval_samples_per_second": 410.205, |
|
"eval_steps_per_second": 13.365, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 157.89473684210526, |
|
"grad_norm": 1.4984138011932373, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2396, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 157.89473684210526, |
|
"eval_accuracy": 0.8267419962335216, |
|
"eval_loss": 1.1026420593261719, |
|
"eval_runtime": 0.9732, |
|
"eval_samples_per_second": 410.004, |
|
"eval_steps_per_second": 13.359, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 168.42105263157896, |
|
"grad_norm": 2.6757421493530273, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.219, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 168.42105263157896, |
|
"eval_accuracy": 0.8240875912408759, |
|
"eval_loss": 1.1283928155899048, |
|
"eval_runtime": 0.9771, |
|
"eval_samples_per_second": 408.349, |
|
"eval_steps_per_second": 13.305, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 178.94736842105263, |
|
"grad_norm": 2.3967316150665283, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.2028, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 178.94736842105263, |
|
"eval_accuracy": 0.8243047508690614, |
|
"eval_loss": 1.1204878091812134, |
|
"eval_runtime": 0.9723, |
|
"eval_samples_per_second": 410.383, |
|
"eval_steps_per_second": 13.371, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 189.47368421052633, |
|
"grad_norm": 1.8826932907104492, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.1927, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 189.47368421052633, |
|
"eval_accuracy": 0.8313253012048193, |
|
"eval_loss": 1.1103968620300293, |
|
"eval_runtime": 0.9708, |
|
"eval_samples_per_second": 411.005, |
|
"eval_steps_per_second": 13.391, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 9.548158645629883, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.1841, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.8348446558644269, |
|
"eval_loss": 1.028389573097229, |
|
"eval_runtime": 0.9722, |
|
"eval_samples_per_second": 410.409, |
|
"eval_steps_per_second": 13.372, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 210.52631578947367, |
|
"grad_norm": 2.206833600997925, |
|
"learning_rate": 1.111111111111111e-05, |
|
"loss": 0.1687, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 210.52631578947367, |
|
"eval_accuracy": 0.8265602322206096, |
|
"eval_loss": 1.166216492652893, |
|
"eval_runtime": 0.9745, |
|
"eval_samples_per_second": 409.461, |
|
"eval_steps_per_second": 13.341, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 221.05263157894737, |
|
"grad_norm": 2.2445051670074463, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.1627, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 221.05263157894737, |
|
"eval_accuracy": 0.8278388278388278, |
|
"eval_loss": 1.1330479383468628, |
|
"eval_runtime": 0.9721, |
|
"eval_samples_per_second": 410.471, |
|
"eval_steps_per_second": 13.374, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 231.57894736842104, |
|
"grad_norm": 2.7982735633850098, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1564, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 231.57894736842104, |
|
"eval_accuracy": 0.826530612244898, |
|
"eval_loss": 1.1413230895996094, |
|
"eval_runtime": 0.974, |
|
"eval_samples_per_second": 409.663, |
|
"eval_steps_per_second": 13.347, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 242.10526315789474, |
|
"grad_norm": 2.1092989444732666, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.1483, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 242.10526315789474, |
|
"eval_accuracy": 0.8245868367642795, |
|
"eval_loss": 1.1836098432540894, |
|
"eval_runtime": 0.9739, |
|
"eval_samples_per_second": 409.679, |
|
"eval_steps_per_second": 13.348, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 252.6315789473684, |
|
"grad_norm": 2.0988609790802, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1439, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 252.6315789473684, |
|
"eval_accuracy": 0.8179065743944637, |
|
"eval_loss": 1.2169371843338013, |
|
"eval_runtime": 0.9715, |
|
"eval_samples_per_second": 410.688, |
|
"eval_steps_per_second": 13.381, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 263.1578947368421, |
|
"grad_norm": 2.059922218322754, |
|
"learning_rate": 5.555555555555555e-06, |
|
"loss": 0.1396, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 263.1578947368421, |
|
"eval_accuracy": 0.8265973254086181, |
|
"eval_loss": 1.1871376037597656, |
|
"eval_runtime": 0.975, |
|
"eval_samples_per_second": 409.228, |
|
"eval_steps_per_second": 13.333, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 273.6842105263158, |
|
"grad_norm": 1.9513349533081055, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1364, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 273.6842105263158, |
|
"eval_accuracy": 0.8300921187308086, |
|
"eval_loss": 1.1696407794952393, |
|
"eval_runtime": 0.9733, |
|
"eval_samples_per_second": 409.933, |
|
"eval_steps_per_second": 13.356, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 284.2105263157895, |
|
"grad_norm": 1.883595585823059, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.1314, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 284.2105263157895, |
|
"eval_accuracy": 0.8324355801426699, |
|
"eval_loss": 1.1556968688964844, |
|
"eval_runtime": 0.972, |
|
"eval_samples_per_second": 410.49, |
|
"eval_steps_per_second": 13.374, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 294.7368421052632, |
|
"grad_norm": 2.5128650665283203, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1295, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 294.7368421052632, |
|
"eval_accuracy": 0.8297682709447415, |
|
"eval_loss": 1.1712359189987183, |
|
"eval_runtime": 0.9734, |
|
"eval_samples_per_second": 409.885, |
|
"eval_steps_per_second": 13.355, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 305.2631578947368, |
|
"grad_norm": 2.12408447265625, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.1296, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 305.2631578947368, |
|
"eval_accuracy": 0.8273445653794543, |
|
"eval_loss": 1.1821348667144775, |
|
"eval_runtime": 0.9718, |
|
"eval_samples_per_second": 410.586, |
|
"eval_steps_per_second": 13.377, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 315.7894736842105, |
|
"grad_norm": 1.681922197341919, |
|
"learning_rate": 0.0, |
|
"loss": 0.1251, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 315.7894736842105, |
|
"eval_accuracy": 0.826234841762792, |
|
"eval_loss": 1.1566745042800903, |
|
"eval_runtime": 0.9729, |
|
"eval_samples_per_second": 410.099, |
|
"eval_steps_per_second": 13.362, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 315.7894736842105, |
|
"step": 30000, |
|
"total_flos": 6.261324507818496e+16, |
|
"train_loss": 0.5399098532358806, |
|
"train_runtime": 6604.7234, |
|
"train_samples_per_second": 145.351, |
|
"train_steps_per_second": 4.542 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 316, |
|
"save_steps": 1000, |
|
"total_flos": 6.261324507818496e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|