|
{ |
|
"best_metric": 0.8020833333333334, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-masakhaner-hau/checkpoint-7000", |
|
"epoch": 133.33333333333334, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy_score": 0.9178235002478929, |
|
"eval_f1": 0.44129032258064516, |
|
"eval_loss": 0.2975754737854004, |
|
"eval_precision": 0.38212290502793295, |
|
"eval_recall": 0.5221374045801527, |
|
"eval_runtime": 2.1678, |
|
"eval_samples_per_second": 127.318, |
|
"eval_steps_per_second": 16.145, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_accuracy_score": 0.9432325235498265, |
|
"eval_f1": 0.625340599455041, |
|
"eval_loss": 0.21037007868289948, |
|
"eval_precision": 0.5645756457564576, |
|
"eval_recall": 0.7007633587786259, |
|
"eval_runtime": 2.1641, |
|
"eval_samples_per_second": 127.538, |
|
"eval_steps_per_second": 16.173, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.3565, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy_score": 0.9494298463063956, |
|
"eval_f1": 0.6824324324324323, |
|
"eval_loss": 0.19267599284648895, |
|
"eval_precision": 0.6121212121212121, |
|
"eval_recall": 0.7709923664122137, |
|
"eval_runtime": 2.1632, |
|
"eval_samples_per_second": 127.587, |
|
"eval_steps_per_second": 16.18, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_accuracy_score": 0.9559990084283589, |
|
"eval_f1": 0.7209631728045326, |
|
"eval_loss": 0.19005168974399567, |
|
"eval_precision": 0.6723910171730515, |
|
"eval_recall": 0.7770992366412214, |
|
"eval_runtime": 2.1631, |
|
"eval_samples_per_second": 127.597, |
|
"eval_steps_per_second": 16.181, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0821, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_accuracy_score": 0.9621963311849281, |
|
"eval_f1": 0.7534148094895758, |
|
"eval_loss": 0.17941339313983917, |
|
"eval_precision": 0.7119565217391305, |
|
"eval_recall": 0.8, |
|
"eval_runtime": 2.1519, |
|
"eval_samples_per_second": 128.257, |
|
"eval_steps_per_second": 16.265, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy_score": 0.9615765989092712, |
|
"eval_f1": 0.7540751240255138, |
|
"eval_loss": 0.18701964616775513, |
|
"eval_precision": 0.7037037037037037, |
|
"eval_recall": 0.8122137404580153, |
|
"eval_runtime": 2.1592, |
|
"eval_samples_per_second": 127.827, |
|
"eval_steps_per_second": 16.21, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"eval_accuracy_score": 0.9639315815567675, |
|
"eval_f1": 0.768892149669846, |
|
"eval_loss": 0.20827054977416992, |
|
"eval_precision": 0.7401129943502824, |
|
"eval_recall": 0.8, |
|
"eval_runtime": 2.1601, |
|
"eval_samples_per_second": 127.772, |
|
"eval_steps_per_second": 16.203, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0161, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"eval_accuracy_score": 0.9636836886465047, |
|
"eval_f1": 0.7759882869692533, |
|
"eval_loss": 0.21991053223609924, |
|
"eval_precision": 0.7454289732770746, |
|
"eval_recall": 0.8091603053435115, |
|
"eval_runtime": 2.1585, |
|
"eval_samples_per_second": 127.866, |
|
"eval_steps_per_second": 16.215, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy_score": 0.9654189390183441, |
|
"eval_f1": 0.7798833819241983, |
|
"eval_loss": 0.20790381729602814, |
|
"eval_precision": 0.7461645746164575, |
|
"eval_recall": 0.816793893129771, |
|
"eval_runtime": 2.1597, |
|
"eval_samples_per_second": 127.793, |
|
"eval_steps_per_second": 16.206, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0063, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_accuracy_score": 0.964427367377293, |
|
"eval_f1": 0.7772241992882563, |
|
"eval_loss": 0.22246131300926208, |
|
"eval_precision": 0.728, |
|
"eval_recall": 0.833587786259542, |
|
"eval_runtime": 2.1499, |
|
"eval_samples_per_second": 128.377, |
|
"eval_steps_per_second": 16.28, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"eval_accuracy_score": 0.9641794744670302, |
|
"eval_f1": 0.7784342688330871, |
|
"eval_loss": 0.2224682867527008, |
|
"eval_precision": 0.753934191702432, |
|
"eval_recall": 0.8045801526717558, |
|
"eval_runtime": 2.1602, |
|
"eval_samples_per_second": 127.764, |
|
"eval_steps_per_second": 16.202, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy_score": 0.9669062964799207, |
|
"eval_f1": 0.7894736842105263, |
|
"eval_loss": 0.21754750609397888, |
|
"eval_precision": 0.7573632538569425, |
|
"eval_recall": 0.8244274809160306, |
|
"eval_runtime": 2.1638, |
|
"eval_samples_per_second": 127.555, |
|
"eval_steps_per_second": 16.175, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"eval_accuracy_score": 0.9652949925632127, |
|
"eval_f1": 0.7895500725689405, |
|
"eval_loss": 0.23659802973270416, |
|
"eval_precision": 0.7524204702627939, |
|
"eval_recall": 0.8305343511450382, |
|
"eval_runtime": 2.1523, |
|
"eval_samples_per_second": 128.234, |
|
"eval_steps_per_second": 16.262, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"eval_accuracy_score": 0.9669062964799207, |
|
"eval_f1": 0.7883636363636363, |
|
"eval_loss": 0.2335628867149353, |
|
"eval_precision": 0.7527777777777778, |
|
"eval_recall": 0.8274809160305343, |
|
"eval_runtime": 2.1466, |
|
"eval_samples_per_second": 128.574, |
|
"eval_steps_per_second": 16.305, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0028, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy_score": 0.9654189390183441, |
|
"eval_f1": 0.7888970051132214, |
|
"eval_loss": 0.24077844619750977, |
|
"eval_precision": 0.7563025210084033, |
|
"eval_recall": 0.8244274809160306, |
|
"eval_runtime": 2.1522, |
|
"eval_samples_per_second": 128.243, |
|
"eval_steps_per_second": 16.263, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"eval_accuracy_score": 0.9655428854734754, |
|
"eval_f1": 0.7809948032665183, |
|
"eval_loss": 0.2439465969800949, |
|
"eval_precision": 0.7601156069364162, |
|
"eval_recall": 0.8030534351145038, |
|
"eval_runtime": 2.1578, |
|
"eval_samples_per_second": 127.91, |
|
"eval_steps_per_second": 16.22, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 56.67, |
|
"eval_accuracy_score": 0.9659147248388696, |
|
"eval_f1": 0.7880597014925373, |
|
"eval_loss": 0.2444428950548172, |
|
"eval_precision": 0.7708029197080292, |
|
"eval_recall": 0.8061068702290076, |
|
"eval_runtime": 2.1536, |
|
"eval_samples_per_second": 128.155, |
|
"eval_steps_per_second": 16.252, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0019, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy_score": 0.9656668319286068, |
|
"eval_f1": 0.7766423357664234, |
|
"eval_loss": 0.24506314098834991, |
|
"eval_precision": 0.7440559440559441, |
|
"eval_recall": 0.8122137404580153, |
|
"eval_runtime": 2.1569, |
|
"eval_samples_per_second": 127.959, |
|
"eval_steps_per_second": 16.227, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"eval_accuracy_score": 0.9675260287555776, |
|
"eval_f1": 0.8005908419497784, |
|
"eval_loss": 0.2420436143875122, |
|
"eval_precision": 0.7753934191702432, |
|
"eval_recall": 0.8274809160305343, |
|
"eval_runtime": 2.151, |
|
"eval_samples_per_second": 128.313, |
|
"eval_steps_per_second": 16.272, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0024, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_accuracy_score": 0.9659147248388696, |
|
"eval_f1": 0.7849779086892489, |
|
"eval_loss": 0.251669317483902, |
|
"eval_precision": 0.7581792318634424, |
|
"eval_recall": 0.8137404580152672, |
|
"eval_runtime": 2.1475, |
|
"eval_samples_per_second": 128.521, |
|
"eval_steps_per_second": 16.298, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy_score": 0.9652949925632127, |
|
"eval_f1": 0.7835051546391752, |
|
"eval_loss": 0.25008824467658997, |
|
"eval_precision": 0.7567567567567568, |
|
"eval_recall": 0.8122137404580153, |
|
"eval_runtime": 2.1537, |
|
"eval_samples_per_second": 128.15, |
|
"eval_steps_per_second": 16.251, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"eval_accuracy_score": 0.9656668319286068, |
|
"eval_f1": 0.7784256559766763, |
|
"eval_loss": 0.24835215508937836, |
|
"eval_precision": 0.7447698744769874, |
|
"eval_recall": 0.815267175572519, |
|
"eval_runtime": 2.15, |
|
"eval_samples_per_second": 128.372, |
|
"eval_steps_per_second": 16.279, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.002, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 76.67, |
|
"eval_accuracy_score": 0.9655428854734754, |
|
"eval_f1": 0.777859778597786, |
|
"eval_loss": 0.26140472292900085, |
|
"eval_precision": 0.7528571428571429, |
|
"eval_recall": 0.8045801526717558, |
|
"eval_runtime": 2.1498, |
|
"eval_samples_per_second": 128.383, |
|
"eval_steps_per_second": 16.28, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy_score": 0.9669062964799207, |
|
"eval_f1": 0.7905951506245408, |
|
"eval_loss": 0.24271249771118164, |
|
"eval_precision": 0.7620396600566572, |
|
"eval_recall": 0.8213740458015267, |
|
"eval_runtime": 2.1506, |
|
"eval_samples_per_second": 128.336, |
|
"eval_steps_per_second": 16.275, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0012, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_accuracy_score": 0.9664105106593951, |
|
"eval_f1": 0.777209642074507, |
|
"eval_loss": 0.25782376527786255, |
|
"eval_precision": 0.7450980392156863, |
|
"eval_recall": 0.8122137404580153, |
|
"eval_runtime": 2.1506, |
|
"eval_samples_per_second": 128.335, |
|
"eval_steps_per_second": 16.274, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 86.67, |
|
"eval_accuracy_score": 0.967649975210709, |
|
"eval_f1": 0.7958579881656805, |
|
"eval_loss": 0.25114643573760986, |
|
"eval_precision": 0.7718794835007173, |
|
"eval_recall": 0.8213740458015267, |
|
"eval_runtime": 2.1502, |
|
"eval_samples_per_second": 128.359, |
|
"eval_steps_per_second": 16.277, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy_score": 0.9675260287555776, |
|
"eval_f1": 0.7870302137067059, |
|
"eval_loss": 0.25407665967941284, |
|
"eval_precision": 0.7606837606837606, |
|
"eval_recall": 0.815267175572519, |
|
"eval_runtime": 2.1494, |
|
"eval_samples_per_second": 128.408, |
|
"eval_steps_per_second": 16.284, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.0011, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"eval_accuracy_score": 0.9671541893901835, |
|
"eval_f1": 0.8008817046289494, |
|
"eval_loss": 0.2622869312763214, |
|
"eval_precision": 0.7719546742209632, |
|
"eval_recall": 0.8320610687022901, |
|
"eval_runtime": 2.1439, |
|
"eval_samples_per_second": 128.74, |
|
"eval_steps_per_second": 16.326, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 96.67, |
|
"eval_accuracy_score": 0.9691373326722855, |
|
"eval_f1": 0.8053293856402666, |
|
"eval_loss": 0.25844594836235046, |
|
"eval_precision": 0.7816091954022989, |
|
"eval_recall": 0.8305343511450382, |
|
"eval_runtime": 2.1401, |
|
"eval_samples_per_second": 128.969, |
|
"eval_steps_per_second": 16.355, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.0013, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy_score": 0.9682697074863659, |
|
"eval_f1": 0.7958732498157702, |
|
"eval_loss": 0.25458037853240967, |
|
"eval_precision": 0.7692307692307693, |
|
"eval_recall": 0.8244274809160306, |
|
"eval_runtime": 2.1443, |
|
"eval_samples_per_second": 128.714, |
|
"eval_steps_per_second": 16.322, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 103.33, |
|
"eval_accuracy_score": 0.9678978681209718, |
|
"eval_f1": 0.7973273942093543, |
|
"eval_loss": 0.25813519954681396, |
|
"eval_precision": 0.7760115606936416, |
|
"eval_recall": 0.8198473282442749, |
|
"eval_runtime": 2.1415, |
|
"eval_samples_per_second": 128.885, |
|
"eval_steps_per_second": 16.344, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 106.67, |
|
"eval_accuracy_score": 0.9687654933068914, |
|
"eval_f1": 0.795234549516009, |
|
"eval_loss": 0.2546005845069885, |
|
"eval_precision": 0.7761627906976745, |
|
"eval_recall": 0.815267175572519, |
|
"eval_runtime": 2.1444, |
|
"eval_samples_per_second": 128.705, |
|
"eval_steps_per_second": 16.321, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.0011, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy_score": 0.967649975210709, |
|
"eval_f1": 0.796711509715994, |
|
"eval_loss": 0.2577730715274811, |
|
"eval_precision": 0.780380673499268, |
|
"eval_recall": 0.8137404580152672, |
|
"eval_runtime": 2.1446, |
|
"eval_samples_per_second": 128.696, |
|
"eval_steps_per_second": 16.32, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 113.33, |
|
"eval_accuracy_score": 0.9678978681209718, |
|
"eval_f1": 0.8061901252763449, |
|
"eval_loss": 0.24760644137859344, |
|
"eval_precision": 0.7792022792022792, |
|
"eval_recall": 0.8351145038167939, |
|
"eval_runtime": 2.1452, |
|
"eval_samples_per_second": 128.66, |
|
"eval_steps_per_second": 16.316, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.0007, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"eval_accuracy_score": 0.9688894397620228, |
|
"eval_f1": 0.8020833333333334, |
|
"eval_loss": 0.25953975319862366, |
|
"eval_precision": 0.7822931785195936, |
|
"eval_recall": 0.8229007633587786, |
|
"eval_runtime": 2.1496, |
|
"eval_samples_per_second": 128.395, |
|
"eval_steps_per_second": 16.282, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy_score": 0.9687654933068914, |
|
"eval_f1": 0.7976278724981469, |
|
"eval_loss": 0.2510833740234375, |
|
"eval_precision": 0.7752161383285303, |
|
"eval_recall": 0.8213740458015267, |
|
"eval_runtime": 2.1454, |
|
"eval_samples_per_second": 128.649, |
|
"eval_steps_per_second": 16.314, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 123.33, |
|
"eval_accuracy_score": 0.9681457610312345, |
|
"eval_f1": 0.7857672349888806, |
|
"eval_loss": 0.26905274391174316, |
|
"eval_precision": 0.7636887608069164, |
|
"eval_recall": 0.8091603053435115, |
|
"eval_runtime": 2.1419, |
|
"eval_samples_per_second": 128.86, |
|
"eval_steps_per_second": 16.341, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 2.516778523489933e-05, |
|
"loss": 0.0006, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 126.67, |
|
"eval_accuracy_score": 0.9682697074863659, |
|
"eval_f1": 0.7914201183431951, |
|
"eval_loss": 0.26672181487083435, |
|
"eval_precision": 0.7675753228120517, |
|
"eval_recall": 0.816793893129771, |
|
"eval_runtime": 2.1398, |
|
"eval_samples_per_second": 128.981, |
|
"eval_steps_per_second": 16.356, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy_score": 0.9686415468517601, |
|
"eval_f1": 0.7979351032448379, |
|
"eval_loss": 0.2618446350097656, |
|
"eval_precision": 0.7717546362339515, |
|
"eval_recall": 0.8259541984732824, |
|
"eval_runtime": 2.1453, |
|
"eval_samples_per_second": 128.65, |
|
"eval_steps_per_second": 16.314, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 2.348993288590604e-05, |
|
"loss": 0.0006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"eval_accuracy_score": 0.967030242935052, |
|
"eval_f1": 0.7997086671522214, |
|
"eval_loss": 0.27878984808921814, |
|
"eval_precision": 0.7646239554317549, |
|
"eval_recall": 0.8381679389312977, |
|
"eval_runtime": 2.1418, |
|
"eval_samples_per_second": 128.862, |
|
"eval_steps_per_second": 16.341, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"step": 8000, |
|
"total_flos": 4.186468556207309e+16, |
|
"train_loss": 0.030014565151184797, |
|
"train_runtime": 3978.2234, |
|
"train_samples_per_second": 120.657, |
|
"train_steps_per_second": 3.771 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 250, |
|
"total_flos": 4.186468556207309e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|