|
{ |
|
"best_metric": 0.1180819422006607, |
|
"best_model_checkpoint": "CXR-Classifier/checkpoint-1224", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1224, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.592426061630249, |
|
"learning_rate": 8.130081300813009e-06, |
|
"loss": 0.5972, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.819566249847412, |
|
"learning_rate": 1.6260162601626018e-05, |
|
"loss": 0.4976, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.789632320404053, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 0.3321, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.508607864379883, |
|
"learning_rate": 3.2520325203252037e-05, |
|
"loss": 0.415, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.562315464019775, |
|
"learning_rate": 4.065040650406504e-05, |
|
"loss": 0.2412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.414723873138428, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 0.3456, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.0423896312713623, |
|
"learning_rate": 4.922797456857402e-05, |
|
"loss": 0.2415, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.041806221008301, |
|
"learning_rate": 4.83197093551317e-05, |
|
"loss": 0.3546, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.6937503814697266, |
|
"learning_rate": 4.741144414168938e-05, |
|
"loss": 0.3947, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.492763996124268, |
|
"learning_rate": 4.650317892824705e-05, |
|
"loss": 0.3063, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9708950519561768, |
|
"learning_rate": 4.559491371480473e-05, |
|
"loss": 0.3115, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.533012390136719, |
|
"learning_rate": 4.46866485013624e-05, |
|
"loss": 0.5087, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 8.02456283569336, |
|
"learning_rate": 4.377838328792008e-05, |
|
"loss": 0.2745, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0878229141235352, |
|
"learning_rate": 4.287011807447775e-05, |
|
"loss": 0.1905, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.465769290924072, |
|
"learning_rate": 4.196185286103542e-05, |
|
"loss": 0.2509, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 15.646003723144531, |
|
"learning_rate": 4.10535876475931e-05, |
|
"loss": 0.4353, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.2481565475463867, |
|
"learning_rate": 4.014532243415077e-05, |
|
"loss": 0.3478, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.395519733428955, |
|
"learning_rate": 3.923705722070845e-05, |
|
"loss": 0.2199, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.089118003845215, |
|
"learning_rate": 3.832879200726612e-05, |
|
"loss": 0.2715, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.150867462158203, |
|
"learning_rate": 3.74205267938238e-05, |
|
"loss": 0.2074, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9387254901960784, |
|
"eval_auc": 0.9766835240883684, |
|
"eval_f1": 0.957841483979764, |
|
"eval_loss": 0.2350389063358307, |
|
"eval_precision": 0.961082910321489, |
|
"eval_recall": 0.9546218487394958, |
|
"eval_runtime": 246.6561, |
|
"eval_samples_per_second": 3.308, |
|
"eval_steps_per_second": 0.207, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 6.791078090667725, |
|
"learning_rate": 3.651226158038147e-05, |
|
"loss": 0.1235, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 5.592333793640137, |
|
"learning_rate": 3.560399636693915e-05, |
|
"loss": 0.4199, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.2713923752307892, |
|
"learning_rate": 3.469573115349682e-05, |
|
"loss": 0.3119, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.907072067260742, |
|
"learning_rate": 3.37874659400545e-05, |
|
"loss": 0.2118, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.9097113013267517, |
|
"learning_rate": 3.287920072661217e-05, |
|
"loss": 0.2174, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 6.9212141036987305, |
|
"learning_rate": 3.197093551316985e-05, |
|
"loss": 0.2448, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 6.113616466522217, |
|
"learning_rate": 3.106267029972752e-05, |
|
"loss": 0.1619, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.9741531014442444, |
|
"learning_rate": 3.0154405086285197e-05, |
|
"loss": 0.3296, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.604313611984253, |
|
"learning_rate": 2.924613987284287e-05, |
|
"loss": 0.1598, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 5.160298824310303, |
|
"learning_rate": 2.8337874659400547e-05, |
|
"loss": 0.2605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 7.961933135986328, |
|
"learning_rate": 2.7429609445958222e-05, |
|
"loss": 0.295, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 3.545825719833374, |
|
"learning_rate": 2.6521344232515894e-05, |
|
"loss": 0.2613, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.7656643390655518, |
|
"learning_rate": 2.5613079019073572e-05, |
|
"loss": 0.1684, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 14.269344329833984, |
|
"learning_rate": 2.4704813805631247e-05, |
|
"loss": 0.3285, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.21142134070396423, |
|
"learning_rate": 2.379654859218892e-05, |
|
"loss": 0.2071, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0282666683197021, |
|
"learning_rate": 2.2888283378746594e-05, |
|
"loss": 0.2701, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 12.365777969360352, |
|
"learning_rate": 2.198001816530427e-05, |
|
"loss": 0.1753, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 6.909509181976318, |
|
"learning_rate": 2.1071752951861944e-05, |
|
"loss": 0.185, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 10.059576034545898, |
|
"learning_rate": 2.016348773841962e-05, |
|
"loss": 0.1403, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 13.194554328918457, |
|
"learning_rate": 1.9255222524977297e-05, |
|
"loss": 0.177, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9522058823529411, |
|
"eval_auc": 0.9864329442184113, |
|
"eval_f1": 0.967418546365915, |
|
"eval_loss": 0.15405645966529846, |
|
"eval_precision": 0.9617940199335548, |
|
"eval_recall": 0.973109243697479, |
|
"eval_runtime": 257.0506, |
|
"eval_samples_per_second": 3.174, |
|
"eval_steps_per_second": 0.198, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.45505988597869873, |
|
"learning_rate": 1.834695731153497e-05, |
|
"loss": 0.1334, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.5608593821525574, |
|
"learning_rate": 1.7438692098092644e-05, |
|
"loss": 0.1801, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.9215396642684937, |
|
"learning_rate": 1.653042688465032e-05, |
|
"loss": 0.1397, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.03459596261382103, |
|
"learning_rate": 1.5622161671207994e-05, |
|
"loss": 0.0797, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 4.931589603424072, |
|
"learning_rate": 1.4713896457765669e-05, |
|
"loss": 0.1547, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 12.403867721557617, |
|
"learning_rate": 1.3805631244323344e-05, |
|
"loss": 0.1008, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 6.834578514099121, |
|
"learning_rate": 1.2897366030881017e-05, |
|
"loss": 0.3086, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.12356822937726974, |
|
"learning_rate": 1.1989100817438692e-05, |
|
"loss": 0.1367, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.23836758732795715, |
|
"learning_rate": 1.1080835603996367e-05, |
|
"loss": 0.1204, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.645460307598114, |
|
"learning_rate": 1.0172570390554042e-05, |
|
"loss": 0.2857, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 6.155028820037842, |
|
"learning_rate": 9.264305177111717e-06, |
|
"loss": 0.1514, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 6.625197410583496, |
|
"learning_rate": 8.356039963669392e-06, |
|
"loss": 0.1973, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.4476400911808014, |
|
"learning_rate": 7.447774750227067e-06, |
|
"loss": 0.1153, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 11.432110786437988, |
|
"learning_rate": 6.539509536784741e-06, |
|
"loss": 0.1943, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 6.038093090057373, |
|
"learning_rate": 5.631244323342416e-06, |
|
"loss": 0.0998, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.24591827392578125, |
|
"learning_rate": 4.722979109900091e-06, |
|
"loss": 0.1767, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 3.9476640224456787, |
|
"learning_rate": 3.814713896457766e-06, |
|
"loss": 0.1798, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 9.382974624633789, |
|
"learning_rate": 2.9064486830154405e-06, |
|
"loss": 0.1707, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.10719335079193115, |
|
"learning_rate": 1.9981834695731155e-06, |
|
"loss": 0.2662, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 10.07032299041748, |
|
"learning_rate": 1.0899182561307902e-06, |
|
"loss": 0.218, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 17.199472427368164, |
|
"learning_rate": 1.8165304268846503e-07, |
|
"loss": 0.1692, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9644607843137255, |
|
"eval_auc": 0.9916270580630442, |
|
"eval_f1": 0.9755686604886269, |
|
"eval_loss": 0.1180819422006607, |
|
"eval_precision": 0.9780405405405406, |
|
"eval_recall": 0.973109243697479, |
|
"eval_runtime": 252.4161, |
|
"eval_samples_per_second": 3.233, |
|
"eval_steps_per_second": 0.202, |
|
"step": 1224 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1224, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 7.581041343995535e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|