|
{ |
|
"best_metric": 0.2802155911922455, |
|
"best_model_checkpoint": "autotrain-swinv2-tiny-patch4-window8-256/checkpoint-4386", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 4386, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01709986320109439, |
|
"grad_norm": 16.21043586730957, |
|
"learning_rate": 7.86593707250342e-07, |
|
"loss": 2.7812, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03419972640218878, |
|
"grad_norm": 21.944915771484375, |
|
"learning_rate": 1.6415868673050617e-06, |
|
"loss": 2.6916, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05129958960328317, |
|
"grad_norm": 15.82861328125, |
|
"learning_rate": 2.496580027359781e-06, |
|
"loss": 2.5256, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06839945280437756, |
|
"grad_norm": 28.86319351196289, |
|
"learning_rate": 3.3515731874145007e-06, |
|
"loss": 2.2838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08549931600547196, |
|
"grad_norm": 23.919939041137695, |
|
"learning_rate": 4.172366621067032e-06, |
|
"loss": 1.993, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10259917920656635, |
|
"grad_norm": 22.423189163208008, |
|
"learning_rate": 5.027359781121752e-06, |
|
"loss": 1.9762, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11969904240766074, |
|
"grad_norm": 18.18659210205078, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.6428, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13679890560875513, |
|
"grad_norm": 25.3078670501709, |
|
"learning_rate": 6.73734610123119e-06, |
|
"loss": 1.6677, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1538987688098495, |
|
"grad_norm": 33.7442626953125, |
|
"learning_rate": 7.592339261285911e-06, |
|
"loss": 1.5048, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.17099863201094392, |
|
"grad_norm": 44.62141799926758, |
|
"learning_rate": 8.44733242134063e-06, |
|
"loss": 1.3579, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1880984952120383, |
|
"grad_norm": 47.94766616821289, |
|
"learning_rate": 9.26812585499316e-06, |
|
"loss": 1.3141, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2051983584131327, |
|
"grad_norm": 50.67353820800781, |
|
"learning_rate": 1.0123119015047879e-05, |
|
"loss": 1.5654, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22229822161422708, |
|
"grad_norm": 34.55750274658203, |
|
"learning_rate": 1.09781121751026e-05, |
|
"loss": 1.3736, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2393980848153215, |
|
"grad_norm": 29.00438117980957, |
|
"learning_rate": 1.183310533515732e-05, |
|
"loss": 1.274, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25649794801641584, |
|
"grad_norm": 24.081707000732422, |
|
"learning_rate": 1.2688098495212038e-05, |
|
"loss": 1.2503, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.27359781121751026, |
|
"grad_norm": 23.609891891479492, |
|
"learning_rate": 1.354309165526676e-05, |
|
"loss": 1.0883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"grad_norm": 18.259571075439453, |
|
"learning_rate": 1.4398084815321477e-05, |
|
"loss": 1.1379, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.307797537619699, |
|
"grad_norm": 42.96183395385742, |
|
"learning_rate": 1.5253077975376198e-05, |
|
"loss": 1.2596, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32489740082079344, |
|
"grad_norm": 37.27230453491211, |
|
"learning_rate": 1.6108071135430915e-05, |
|
"loss": 1.0158, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.34199726402188785, |
|
"grad_norm": 113.23546600341797, |
|
"learning_rate": 1.6963064295485636e-05, |
|
"loss": 1.0687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3590971272229822, |
|
"grad_norm": 19.023685455322266, |
|
"learning_rate": 1.7818057455540357e-05, |
|
"loss": 0.9566, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3761969904240766, |
|
"grad_norm": 29.44492530822754, |
|
"learning_rate": 1.8673050615595075e-05, |
|
"loss": 1.128, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.393296853625171, |
|
"grad_norm": 42.041595458984375, |
|
"learning_rate": 1.9528043775649796e-05, |
|
"loss": 1.0133, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4103967168262654, |
|
"grad_norm": 47.55967712402344, |
|
"learning_rate": 2.0383036935704516e-05, |
|
"loss": 1.0888, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4274965800273598, |
|
"grad_norm": 12.591029167175293, |
|
"learning_rate": 2.1238030095759234e-05, |
|
"loss": 0.9936, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.44459644322845415, |
|
"grad_norm": 31.012723922729492, |
|
"learning_rate": 2.2093023255813955e-05, |
|
"loss": 0.9765, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46169630642954856, |
|
"grad_norm": 39.08427047729492, |
|
"learning_rate": 2.2948016415868672e-05, |
|
"loss": 0.9398, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.478796169630643, |
|
"grad_norm": 34.639007568359375, |
|
"learning_rate": 2.3803009575923393e-05, |
|
"loss": 0.7951, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.49589603283173733, |
|
"grad_norm": 40.917171478271484, |
|
"learning_rate": 2.4658002735978114e-05, |
|
"loss": 0.7592, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5129958960328317, |
|
"grad_norm": 27.42568016052246, |
|
"learning_rate": 2.5512995896032832e-05, |
|
"loss": 0.8023, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5300957592339262, |
|
"grad_norm": 10.912271499633789, |
|
"learning_rate": 2.6367989056087556e-05, |
|
"loss": 0.832, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5471956224350205, |
|
"grad_norm": 35.33407211303711, |
|
"learning_rate": 2.7222982216142274e-05, |
|
"loss": 0.8747, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5642954856361149, |
|
"grad_norm": 23.503469467163086, |
|
"learning_rate": 2.807797537619699e-05, |
|
"loss": 0.9251, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 28.509077072143555, |
|
"learning_rate": 2.893296853625171e-05, |
|
"loss": 0.9039, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5984952120383037, |
|
"grad_norm": 20.486900329589844, |
|
"learning_rate": 2.9787961696306433e-05, |
|
"loss": 0.9576, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.615595075239398, |
|
"grad_norm": 26.435588836669922, |
|
"learning_rate": 3.064295485636115e-05, |
|
"loss": 0.8952, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6326949384404925, |
|
"grad_norm": 25.732032775878906, |
|
"learning_rate": 3.149794801641587e-05, |
|
"loss": 0.8222, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6497948016415869, |
|
"grad_norm": 33.15768051147461, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.9005, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6668946648426812, |
|
"grad_norm": 35.35673141479492, |
|
"learning_rate": 3.3207934336525306e-05, |
|
"loss": 1.081, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6839945280437757, |
|
"grad_norm": 46.90694046020508, |
|
"learning_rate": 3.406292749658003e-05, |
|
"loss": 0.7733, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.70109439124487, |
|
"grad_norm": 26.313335418701172, |
|
"learning_rate": 3.491792065663475e-05, |
|
"loss": 0.919, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7181942544459644, |
|
"grad_norm": 44.662662506103516, |
|
"learning_rate": 3.577291381668947e-05, |
|
"loss": 0.6723, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 6.602749347686768, |
|
"learning_rate": 3.662790697674418e-05, |
|
"loss": 0.7601, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7523939808481532, |
|
"grad_norm": 38.393550872802734, |
|
"learning_rate": 3.748290013679891e-05, |
|
"loss": 0.748, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7694938440492476, |
|
"grad_norm": 11.72321891784668, |
|
"learning_rate": 3.8337893296853625e-05, |
|
"loss": 0.7516, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.786593707250342, |
|
"grad_norm": 11.254487991333008, |
|
"learning_rate": 3.9192886456908346e-05, |
|
"loss": 0.7186, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8036935704514364, |
|
"grad_norm": 44.11043930053711, |
|
"learning_rate": 4.004787961696307e-05, |
|
"loss": 0.7565, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8207934336525308, |
|
"grad_norm": 11.987720489501953, |
|
"learning_rate": 4.090287277701779e-05, |
|
"loss": 0.7304, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8378932968536251, |
|
"grad_norm": 17.10840606689453, |
|
"learning_rate": 4.17578659370725e-05, |
|
"loss": 0.8912, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8549931600547196, |
|
"grad_norm": 19.49997901916504, |
|
"learning_rate": 4.261285909712722e-05, |
|
"loss": 0.9355, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"grad_norm": 12.431605339050293, |
|
"learning_rate": 4.3467852257181944e-05, |
|
"loss": 0.7146, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8891928864569083, |
|
"grad_norm": 20.540786743164062, |
|
"learning_rate": 4.4322845417236665e-05, |
|
"loss": 0.8042, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9062927496580028, |
|
"grad_norm": 28.939634323120117, |
|
"learning_rate": 4.517783857729138e-05, |
|
"loss": 0.7885, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9233926128590971, |
|
"grad_norm": 27.024660110473633, |
|
"learning_rate": 4.6032831737346106e-05, |
|
"loss": 0.7953, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9404924760601915, |
|
"grad_norm": 47.79359436035156, |
|
"learning_rate": 4.688782489740082e-05, |
|
"loss": 0.759, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.957592339261286, |
|
"grad_norm": 18.608360290527344, |
|
"learning_rate": 4.774281805745554e-05, |
|
"loss": 0.7392, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9746922024623803, |
|
"grad_norm": 16.670150756835938, |
|
"learning_rate": 4.859781121751026e-05, |
|
"loss": 0.664, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9917920656634747, |
|
"grad_norm": 21.591880798339844, |
|
"learning_rate": 4.945280437756498e-05, |
|
"loss": 0.7628, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7722772277227723, |
|
"eval_f1_macro": 0.3963476960209859, |
|
"eval_f1_micro": 0.7722772277227723, |
|
"eval_f1_weighted": 0.7524459692668548, |
|
"eval_loss": 0.6640351414680481, |
|
"eval_precision_macro": 0.5662665685743159, |
|
"eval_precision_micro": 0.7722772277227723, |
|
"eval_precision_weighted": 0.8150598854310834, |
|
"eval_recall_macro": 0.4019843036358822, |
|
"eval_recall_micro": 0.7722772277227723, |
|
"eval_recall_weighted": 0.7722772277227723, |
|
"eval_runtime": 19.424, |
|
"eval_samples_per_second": 150.793, |
|
"eval_steps_per_second": 9.473, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.008891928864569, |
|
"grad_norm": 18.68697738647461, |
|
"learning_rate": 4.996580027359781e-05, |
|
"loss": 0.6562, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.0259917920656634, |
|
"grad_norm": 26.281583786010742, |
|
"learning_rate": 4.9870801033591734e-05, |
|
"loss": 0.7318, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.043091655266758, |
|
"grad_norm": 26.66839599609375, |
|
"learning_rate": 4.977580179358565e-05, |
|
"loss": 0.803, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0601915184678523, |
|
"grad_norm": 10.613127708435059, |
|
"learning_rate": 4.9680802553579575e-05, |
|
"loss": 0.536, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0772913816689467, |
|
"grad_norm": 13.497079849243164, |
|
"learning_rate": 4.958580331357349e-05, |
|
"loss": 0.6842, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.094391244870041, |
|
"grad_norm": 9.89592170715332, |
|
"learning_rate": 4.9490804073567415e-05, |
|
"loss": 0.6305, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1114911080711354, |
|
"grad_norm": 16.67163848876953, |
|
"learning_rate": 4.939580483356133e-05, |
|
"loss": 0.7628, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.1285909712722297, |
|
"grad_norm": 42.5455207824707, |
|
"learning_rate": 4.9300805593555256e-05, |
|
"loss": 0.6883, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1456908344733243, |
|
"grad_norm": 10.086162567138672, |
|
"learning_rate": 4.920580635354917e-05, |
|
"loss": 0.6851, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 15.008639335632324, |
|
"learning_rate": 4.9110807113543096e-05, |
|
"loss": 0.7015, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.179890560875513, |
|
"grad_norm": 36.36772155761719, |
|
"learning_rate": 4.901580787353701e-05, |
|
"loss": 0.7014, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.1969904240766074, |
|
"grad_norm": 24.153322219848633, |
|
"learning_rate": 4.892080863353094e-05, |
|
"loss": 0.6344, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2140902872777017, |
|
"grad_norm": 14.07002067565918, |
|
"learning_rate": 4.8825809393524854e-05, |
|
"loss": 0.7835, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.231190150478796, |
|
"grad_norm": 7.812533378601074, |
|
"learning_rate": 4.873081015351878e-05, |
|
"loss": 0.5902, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2482900136798905, |
|
"grad_norm": 16.708251953125, |
|
"learning_rate": 4.8635810913512694e-05, |
|
"loss": 0.6682, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.265389876880985, |
|
"grad_norm": 62.408294677734375, |
|
"learning_rate": 4.854081167350661e-05, |
|
"loss": 0.7275, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2824897400820794, |
|
"grad_norm": 11.449152946472168, |
|
"learning_rate": 4.8445812433500535e-05, |
|
"loss": 0.7308, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.2995896032831737, |
|
"grad_norm": 6.670589447021484, |
|
"learning_rate": 4.835081319349445e-05, |
|
"loss": 0.6819, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.316689466484268, |
|
"grad_norm": 14.783951759338379, |
|
"learning_rate": 4.8255813953488375e-05, |
|
"loss": 0.7014, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.3337893296853625, |
|
"grad_norm": 11.176630973815918, |
|
"learning_rate": 4.816081471348229e-05, |
|
"loss": 0.7157, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.350889192886457, |
|
"grad_norm": 14.224772453308105, |
|
"learning_rate": 4.8065815473476216e-05, |
|
"loss": 0.58, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.3679890560875512, |
|
"grad_norm": 35.03193283081055, |
|
"learning_rate": 4.797081623347013e-05, |
|
"loss": 0.7904, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3850889192886457, |
|
"grad_norm": 47.37995147705078, |
|
"learning_rate": 4.7875816993464056e-05, |
|
"loss": 0.7543, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.40218878248974, |
|
"grad_norm": 13.47080135345459, |
|
"learning_rate": 4.778081775345797e-05, |
|
"loss": 0.8038, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4192886456908345, |
|
"grad_norm": 5.6258158683776855, |
|
"learning_rate": 4.76858185134519e-05, |
|
"loss": 0.7249, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.4363885088919288, |
|
"grad_norm": 8.781723976135254, |
|
"learning_rate": 4.7590819273445814e-05, |
|
"loss": 0.5521, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.4534883720930232, |
|
"grad_norm": 7.260859966278076, |
|
"learning_rate": 4.749582003343974e-05, |
|
"loss": 0.5422, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 9.566021919250488, |
|
"learning_rate": 4.7400820793433654e-05, |
|
"loss": 0.7651, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.487688098495212, |
|
"grad_norm": 26.22560691833496, |
|
"learning_rate": 4.730582155342758e-05, |
|
"loss": 0.6579, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.5047879616963065, |
|
"grad_norm": 7.469398498535156, |
|
"learning_rate": 4.7210822313421495e-05, |
|
"loss": 0.7464, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5218878248974008, |
|
"grad_norm": 12.48919677734375, |
|
"learning_rate": 4.711582307341542e-05, |
|
"loss": 0.6315, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.5389876880984952, |
|
"grad_norm": 7.278232097625732, |
|
"learning_rate": 4.7020823833409335e-05, |
|
"loss": 0.5035, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.5560875512995898, |
|
"grad_norm": 20.841951370239258, |
|
"learning_rate": 4.692582459340326e-05, |
|
"loss": 0.7368, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.573187414500684, |
|
"grad_norm": 19.840713500976562, |
|
"learning_rate": 4.6830825353397176e-05, |
|
"loss": 0.7734, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5902872777017785, |
|
"grad_norm": 20.479629516601562, |
|
"learning_rate": 4.673582611339109e-05, |
|
"loss": 0.6132, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.6073871409028728, |
|
"grad_norm": 18.495811462402344, |
|
"learning_rate": 4.664082687338501e-05, |
|
"loss": 0.5594, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.6244870041039672, |
|
"grad_norm": 8.165420532226562, |
|
"learning_rate": 4.6545827633378933e-05, |
|
"loss": 0.6215, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.6415868673050615, |
|
"grad_norm": 9.201417922973633, |
|
"learning_rate": 4.645082839337285e-05, |
|
"loss": 0.5739, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.658686730506156, |
|
"grad_norm": 27.07282829284668, |
|
"learning_rate": 4.6355829153366774e-05, |
|
"loss": 0.6288, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.6757865937072505, |
|
"grad_norm": 7.3830976486206055, |
|
"learning_rate": 4.626082991336069e-05, |
|
"loss": 0.5158, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.6928864569083446, |
|
"grad_norm": 14.039732933044434, |
|
"learning_rate": 4.6165830673354615e-05, |
|
"loss": 0.6536, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.7099863201094392, |
|
"grad_norm": 31.472610473632812, |
|
"learning_rate": 4.607083143334854e-05, |
|
"loss": 0.6594, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7270861833105335, |
|
"grad_norm": 13.184996604919434, |
|
"learning_rate": 4.5975832193342455e-05, |
|
"loss": 0.6299, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.744186046511628, |
|
"grad_norm": 5.3301286697387695, |
|
"learning_rate": 4.588083295333638e-05, |
|
"loss": 0.5745, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.7612859097127223, |
|
"grad_norm": 5.333646774291992, |
|
"learning_rate": 4.5785833713330296e-05, |
|
"loss": 0.5694, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.7783857729138166, |
|
"grad_norm": 22.24896240234375, |
|
"learning_rate": 4.569083447332422e-05, |
|
"loss": 0.5209, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7954856361149112, |
|
"grad_norm": 10.656512260437012, |
|
"learning_rate": 4.5595835233318136e-05, |
|
"loss": 0.5069, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.8125854993160053, |
|
"grad_norm": 14.687119483947754, |
|
"learning_rate": 4.550083599331206e-05, |
|
"loss": 0.5486, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.8296853625171, |
|
"grad_norm": 22.30952262878418, |
|
"learning_rate": 4.540583675330598e-05, |
|
"loss": 0.6733, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.8467852257181943, |
|
"grad_norm": 9.407474517822266, |
|
"learning_rate": 4.53108375132999e-05, |
|
"loss": 0.4365, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.8638850889192886, |
|
"grad_norm": 9.207115173339844, |
|
"learning_rate": 4.521583827329382e-05, |
|
"loss": 0.6675, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.8809849521203832, |
|
"grad_norm": 22.828750610351562, |
|
"learning_rate": 4.5120839033287734e-05, |
|
"loss": 0.5888, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.8980848153214773, |
|
"grad_norm": 6.441003322601318, |
|
"learning_rate": 4.502583979328165e-05, |
|
"loss": 0.4317, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.915184678522572, |
|
"grad_norm": 7.692673683166504, |
|
"learning_rate": 4.4930840553275575e-05, |
|
"loss": 0.622, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.9322845417236663, |
|
"grad_norm": 11.904592514038086, |
|
"learning_rate": 4.483584131326949e-05, |
|
"loss": 0.5188, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.9493844049247606, |
|
"grad_norm": 7.662288188934326, |
|
"learning_rate": 4.4740842073263415e-05, |
|
"loss": 0.7301, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.966484268125855, |
|
"grad_norm": 14.738003730773926, |
|
"learning_rate": 4.464584283325733e-05, |
|
"loss": 0.6253, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.9835841313269493, |
|
"grad_norm": 19.53965950012207, |
|
"learning_rate": 4.4550843593251256e-05, |
|
"loss": 0.5386, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8098327074086719, |
|
"eval_f1_macro": 0.5180108004626454, |
|
"eval_f1_micro": 0.8098327074086719, |
|
"eval_f1_weighted": 0.8111086454170916, |
|
"eval_loss": 0.5644355416297913, |
|
"eval_precision_macro": 0.703611295965861, |
|
"eval_precision_micro": 0.8098327074086719, |
|
"eval_precision_weighted": 0.8679013857559181, |
|
"eval_recall_macro": 0.5354131711427872, |
|
"eval_recall_micro": 0.8098327074086719, |
|
"eval_recall_weighted": 0.8098327074086719, |
|
"eval_runtime": 19.2316, |
|
"eval_samples_per_second": 152.302, |
|
"eval_steps_per_second": 9.568, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 2.000683994528044, |
|
"grad_norm": 21.892972946166992, |
|
"learning_rate": 4.445584435324517e-05, |
|
"loss": 0.6068, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.017783857729138, |
|
"grad_norm": 21.534278869628906, |
|
"learning_rate": 4.4360845113239096e-05, |
|
"loss": 0.5734, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.0348837209302326, |
|
"grad_norm": 5.837319850921631, |
|
"learning_rate": 4.426584587323301e-05, |
|
"loss": 0.6038, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.0519835841313268, |
|
"grad_norm": 9.711400985717773, |
|
"learning_rate": 4.417084663322694e-05, |
|
"loss": 0.5024, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0690834473324213, |
|
"grad_norm": 6.591183185577393, |
|
"learning_rate": 4.4075847393220854e-05, |
|
"loss": 0.6356, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.086183310533516, |
|
"grad_norm": 6.523811340332031, |
|
"learning_rate": 4.398084815321478e-05, |
|
"loss": 0.6886, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.10328317373461, |
|
"grad_norm": 14.16163158416748, |
|
"learning_rate": 4.38858489132087e-05, |
|
"loss": 0.4388, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.1203830369357046, |
|
"grad_norm": 17.01984214782715, |
|
"learning_rate": 4.379084967320262e-05, |
|
"loss": 0.5274, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.1374829001367988, |
|
"grad_norm": 14.736923217773438, |
|
"learning_rate": 4.369585043319654e-05, |
|
"loss": 0.5568, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.1545827633378933, |
|
"grad_norm": 10.865409851074219, |
|
"learning_rate": 4.360085119319046e-05, |
|
"loss": 0.5113, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.1716826265389875, |
|
"grad_norm": 22.429012298583984, |
|
"learning_rate": 4.3505851953184376e-05, |
|
"loss": 0.6578, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.188782489740082, |
|
"grad_norm": 24.677793502807617, |
|
"learning_rate": 4.34108527131783e-05, |
|
"loss": 0.7861, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 10.499431610107422, |
|
"learning_rate": 4.3315853473172216e-05, |
|
"loss": 0.5801, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.2229822161422708, |
|
"grad_norm": 3.640758514404297, |
|
"learning_rate": 4.322085423316613e-05, |
|
"loss": 0.6225, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.2400820793433653, |
|
"grad_norm": 2.9993128776550293, |
|
"learning_rate": 4.312585499316006e-05, |
|
"loss": 0.5234, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.2571819425444595, |
|
"grad_norm": 31.736900329589844, |
|
"learning_rate": 4.3030855753153974e-05, |
|
"loss": 0.6043, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.274281805745554, |
|
"grad_norm": 24.770673751831055, |
|
"learning_rate": 4.29358565131479e-05, |
|
"loss": 0.468, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.2913816689466486, |
|
"grad_norm": 16.497438430786133, |
|
"learning_rate": 4.2840857273141814e-05, |
|
"loss": 0.6247, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.3084815321477428, |
|
"grad_norm": 11.79759693145752, |
|
"learning_rate": 4.274585803313574e-05, |
|
"loss": 0.4275, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 7.504731178283691, |
|
"learning_rate": 4.2650858793129655e-05, |
|
"loss": 0.5987, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.3426812585499315, |
|
"grad_norm": 9.874656677246094, |
|
"learning_rate": 4.2559659522723825e-05, |
|
"loss": 0.3649, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.359781121751026, |
|
"grad_norm": 4.051993370056152, |
|
"learning_rate": 4.246466028271774e-05, |
|
"loss": 0.6407, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.37688098495212, |
|
"grad_norm": 25.524431228637695, |
|
"learning_rate": 4.236966104271166e-05, |
|
"loss": 0.6522, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.3939808481532148, |
|
"grad_norm": 10.878904342651367, |
|
"learning_rate": 4.2274661802705576e-05, |
|
"loss": 0.568, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.4110807113543093, |
|
"grad_norm": 26.06123924255371, |
|
"learning_rate": 4.21796625626995e-05, |
|
"loss": 0.4338, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.4281805745554035, |
|
"grad_norm": 9.708687782287598, |
|
"learning_rate": 4.2084663322693416e-05, |
|
"loss": 0.5463, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.445280437756498, |
|
"grad_norm": 25.002485275268555, |
|
"learning_rate": 4.198966408268734e-05, |
|
"loss": 0.4026, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.462380300957592, |
|
"grad_norm": 27.914440155029297, |
|
"learning_rate": 4.189466484268126e-05, |
|
"loss": 0.4373, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.4794801641586868, |
|
"grad_norm": 16.424388885498047, |
|
"learning_rate": 4.179966560267518e-05, |
|
"loss": 0.6144, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.496580027359781, |
|
"grad_norm": 18.099689483642578, |
|
"learning_rate": 4.17046663626691e-05, |
|
"loss": 0.4678, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.5136798905608755, |
|
"grad_norm": 7.258431434631348, |
|
"learning_rate": 4.160966712266302e-05, |
|
"loss": 0.5745, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.53077975376197, |
|
"grad_norm": 12.761260986328125, |
|
"learning_rate": 4.151466788265694e-05, |
|
"loss": 0.564, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.547879616963064, |
|
"grad_norm": 10.828967094421387, |
|
"learning_rate": 4.141966864265086e-05, |
|
"loss": 0.5247, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.5649794801641588, |
|
"grad_norm": 8.467166900634766, |
|
"learning_rate": 4.1324669402644785e-05, |
|
"loss": 0.6447, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.582079343365253, |
|
"grad_norm": 5.6609883308410645, |
|
"learning_rate": 4.12296701626387e-05, |
|
"loss": 0.4998, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.5991792065663475, |
|
"grad_norm": 8.889337539672852, |
|
"learning_rate": 4.1134670922632626e-05, |
|
"loss": 0.6064, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.616279069767442, |
|
"grad_norm": 10.798035621643066, |
|
"learning_rate": 4.103967168262654e-05, |
|
"loss": 0.4447, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.633378932968536, |
|
"grad_norm": 5.452834129333496, |
|
"learning_rate": 4.0944672442620466e-05, |
|
"loss": 0.5188, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.650478796169631, |
|
"grad_norm": 21.596166610717773, |
|
"learning_rate": 4.084967320261438e-05, |
|
"loss": 0.486, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.667578659370725, |
|
"grad_norm": 27.14288330078125, |
|
"learning_rate": 4.07546739626083e-05, |
|
"loss": 0.586, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.6846785225718195, |
|
"grad_norm": 6.16865873336792, |
|
"learning_rate": 4.065967472260222e-05, |
|
"loss": 0.5886, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.701778385772914, |
|
"grad_norm": 10.905616760253906, |
|
"learning_rate": 4.056467548259614e-05, |
|
"loss": 0.5854, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.718878248974008, |
|
"grad_norm": 23.615705490112305, |
|
"learning_rate": 4.046967624259006e-05, |
|
"loss": 0.5838, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.7359781121751023, |
|
"grad_norm": 20.192031860351562, |
|
"learning_rate": 4.037467700258398e-05, |
|
"loss": 0.4683, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.753077975376197, |
|
"grad_norm": 4.011788845062256, |
|
"learning_rate": 4.02796777625779e-05, |
|
"loss": 0.4985, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.7701778385772915, |
|
"grad_norm": 4.153777122497559, |
|
"learning_rate": 4.018467852257182e-05, |
|
"loss": 0.654, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.7872777017783856, |
|
"grad_norm": 7.651889324188232, |
|
"learning_rate": 4.008967928256574e-05, |
|
"loss": 0.5508, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.80437756497948, |
|
"grad_norm": 4.187475681304932, |
|
"learning_rate": 3.999468004255966e-05, |
|
"loss": 0.5831, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.8214774281805743, |
|
"grad_norm": 15.8203763961792, |
|
"learning_rate": 3.989968080255358e-05, |
|
"loss": 0.5752, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.838577291381669, |
|
"grad_norm": 3.2058730125427246, |
|
"learning_rate": 3.98046815625475e-05, |
|
"loss": 0.5395, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.8556771545827635, |
|
"grad_norm": 20.272655487060547, |
|
"learning_rate": 3.970968232254142e-05, |
|
"loss": 0.6421, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.8727770177838576, |
|
"grad_norm": 12.041251182556152, |
|
"learning_rate": 3.961468308253534e-05, |
|
"loss": 0.5199, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.889876880984952, |
|
"grad_norm": 8.457215309143066, |
|
"learning_rate": 3.951968384252926e-05, |
|
"loss": 0.3992, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.9069767441860463, |
|
"grad_norm": 7.0436787605285645, |
|
"learning_rate": 3.9424684602523184e-05, |
|
"loss": 0.4758, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.924076607387141, |
|
"grad_norm": 14.91028881072998, |
|
"learning_rate": 3.93296853625171e-05, |
|
"loss": 0.565, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 6.1303229331970215, |
|
"learning_rate": 3.9234686122511024e-05, |
|
"loss": 0.4658, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.9582763337893296, |
|
"grad_norm": 3.7947280406951904, |
|
"learning_rate": 3.913968688250494e-05, |
|
"loss": 0.4188, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.975376196990424, |
|
"grad_norm": 6.020320415496826, |
|
"learning_rate": 3.9044687642498865e-05, |
|
"loss": 0.5847, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.9924760601915183, |
|
"grad_norm": 3.9531519412994385, |
|
"learning_rate": 3.894968840249278e-05, |
|
"loss": 0.5847, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9061113007852509, |
|
"eval_f1_macro": 0.7552413664079601, |
|
"eval_f1_micro": 0.9061113007852509, |
|
"eval_f1_weighted": 0.9035670983782715, |
|
"eval_loss": 0.2802155911922455, |
|
"eval_precision_macro": 0.8498221458985794, |
|
"eval_precision_micro": 0.9061113007852509, |
|
"eval_precision_weighted": 0.9095288940815534, |
|
"eval_recall_macro": 0.7266443770545475, |
|
"eval_recall_micro": 0.9061113007852509, |
|
"eval_recall_weighted": 0.9061113007852509, |
|
"eval_runtime": 19.2634, |
|
"eval_samples_per_second": 152.05, |
|
"eval_steps_per_second": 9.552, |
|
"step": 4386 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 14620, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.1417490381074596e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|