{ "best_metric": 0.2802155911922455, "best_model_checkpoint": "autotrain-swinv2-tiny-patch4-window8-256/checkpoint-4386", "epoch": 3.0, "eval_steps": 500, "global_step": 4386, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01709986320109439, "grad_norm": 16.21043586730957, "learning_rate": 7.86593707250342e-07, "loss": 2.7812, "step": 25 }, { "epoch": 0.03419972640218878, "grad_norm": 21.944915771484375, "learning_rate": 1.6415868673050617e-06, "loss": 2.6916, "step": 50 }, { "epoch": 0.05129958960328317, "grad_norm": 15.82861328125, "learning_rate": 2.496580027359781e-06, "loss": 2.5256, "step": 75 }, { "epoch": 0.06839945280437756, "grad_norm": 28.86319351196289, "learning_rate": 3.3515731874145007e-06, "loss": 2.2838, "step": 100 }, { "epoch": 0.08549931600547196, "grad_norm": 23.919939041137695, "learning_rate": 4.172366621067032e-06, "loss": 1.993, "step": 125 }, { "epoch": 0.10259917920656635, "grad_norm": 22.423189163208008, "learning_rate": 5.027359781121752e-06, "loss": 1.9762, "step": 150 }, { "epoch": 0.11969904240766074, "grad_norm": 18.18659210205078, "learning_rate": 5.882352941176471e-06, "loss": 1.6428, "step": 175 }, { "epoch": 0.13679890560875513, "grad_norm": 25.3078670501709, "learning_rate": 6.73734610123119e-06, "loss": 1.6677, "step": 200 }, { "epoch": 0.1538987688098495, "grad_norm": 33.7442626953125, "learning_rate": 7.592339261285911e-06, "loss": 1.5048, "step": 225 }, { "epoch": 0.17099863201094392, "grad_norm": 44.62141799926758, "learning_rate": 8.44733242134063e-06, "loss": 1.3579, "step": 250 }, { "epoch": 0.1880984952120383, "grad_norm": 47.94766616821289, "learning_rate": 9.26812585499316e-06, "loss": 1.3141, "step": 275 }, { "epoch": 0.2051983584131327, "grad_norm": 50.67353820800781, "learning_rate": 1.0123119015047879e-05, "loss": 1.5654, "step": 300 }, { "epoch": 0.22229822161422708, "grad_norm": 34.55750274658203, "learning_rate": 1.09781121751026e-05, "loss": 1.3736, "step": 325 }, { "epoch": 0.2393980848153215, "grad_norm": 29.00438117980957, "learning_rate": 1.183310533515732e-05, "loss": 1.274, "step": 350 }, { "epoch": 0.25649794801641584, "grad_norm": 24.081707000732422, "learning_rate": 1.2688098495212038e-05, "loss": 1.2503, "step": 375 }, { "epoch": 0.27359781121751026, "grad_norm": 23.609891891479492, "learning_rate": 1.354309165526676e-05, "loss": 1.0883, "step": 400 }, { "epoch": 0.29069767441860467, "grad_norm": 18.259571075439453, "learning_rate": 1.4398084815321477e-05, "loss": 1.1379, "step": 425 }, { "epoch": 0.307797537619699, "grad_norm": 42.96183395385742, "learning_rate": 1.5253077975376198e-05, "loss": 1.2596, "step": 450 }, { "epoch": 0.32489740082079344, "grad_norm": 37.27230453491211, "learning_rate": 1.6108071135430915e-05, "loss": 1.0158, "step": 475 }, { "epoch": 0.34199726402188785, "grad_norm": 113.23546600341797, "learning_rate": 1.6963064295485636e-05, "loss": 1.0687, "step": 500 }, { "epoch": 0.3590971272229822, "grad_norm": 19.023685455322266, "learning_rate": 1.7818057455540357e-05, "loss": 0.9566, "step": 525 }, { "epoch": 0.3761969904240766, "grad_norm": 29.44492530822754, "learning_rate": 1.8673050615595075e-05, "loss": 1.128, "step": 550 }, { "epoch": 0.393296853625171, "grad_norm": 42.041595458984375, "learning_rate": 1.9528043775649796e-05, "loss": 1.0133, "step": 575 }, { "epoch": 0.4103967168262654, "grad_norm": 47.55967712402344, "learning_rate": 2.0383036935704516e-05, "loss": 1.0888, "step": 600 }, { "epoch": 0.4274965800273598, "grad_norm": 12.591029167175293, "learning_rate": 2.1238030095759234e-05, "loss": 0.9936, "step": 625 }, { "epoch": 0.44459644322845415, "grad_norm": 31.012723922729492, "learning_rate": 2.2093023255813955e-05, "loss": 0.9765, "step": 650 }, { "epoch": 0.46169630642954856, "grad_norm": 39.08427047729492, "learning_rate": 2.2948016415868672e-05, "loss": 0.9398, "step": 675 }, { "epoch": 0.478796169630643, "grad_norm": 34.639007568359375, "learning_rate": 2.3803009575923393e-05, "loss": 0.7951, "step": 700 }, { "epoch": 0.49589603283173733, "grad_norm": 40.917171478271484, "learning_rate": 2.4658002735978114e-05, "loss": 0.7592, "step": 725 }, { "epoch": 0.5129958960328317, "grad_norm": 27.42568016052246, "learning_rate": 2.5512995896032832e-05, "loss": 0.8023, "step": 750 }, { "epoch": 0.5300957592339262, "grad_norm": 10.912271499633789, "learning_rate": 2.6367989056087556e-05, "loss": 0.832, "step": 775 }, { "epoch": 0.5471956224350205, "grad_norm": 35.33407211303711, "learning_rate": 2.7222982216142274e-05, "loss": 0.8747, "step": 800 }, { "epoch": 0.5642954856361149, "grad_norm": 23.503469467163086, "learning_rate": 2.807797537619699e-05, "loss": 0.9251, "step": 825 }, { "epoch": 0.5813953488372093, "grad_norm": 28.509077072143555, "learning_rate": 2.893296853625171e-05, "loss": 0.9039, "step": 850 }, { "epoch": 0.5984952120383037, "grad_norm": 20.486900329589844, "learning_rate": 2.9787961696306433e-05, "loss": 0.9576, "step": 875 }, { "epoch": 0.615595075239398, "grad_norm": 26.435588836669922, "learning_rate": 3.064295485636115e-05, "loss": 0.8952, "step": 900 }, { "epoch": 0.6326949384404925, "grad_norm": 25.732032775878906, "learning_rate": 3.149794801641587e-05, "loss": 0.8222, "step": 925 }, { "epoch": 0.6497948016415869, "grad_norm": 33.15768051147461, "learning_rate": 3.235294117647059e-05, "loss": 0.9005, "step": 950 }, { "epoch": 0.6668946648426812, "grad_norm": 35.35673141479492, "learning_rate": 3.3207934336525306e-05, "loss": 1.081, "step": 975 }, { "epoch": 0.6839945280437757, "grad_norm": 46.90694046020508, "learning_rate": 3.406292749658003e-05, "loss": 0.7733, "step": 1000 }, { "epoch": 0.70109439124487, "grad_norm": 26.313335418701172, "learning_rate": 3.491792065663475e-05, "loss": 0.919, "step": 1025 }, { "epoch": 0.7181942544459644, "grad_norm": 44.662662506103516, "learning_rate": 3.577291381668947e-05, "loss": 0.6723, "step": 1050 }, { "epoch": 0.7352941176470589, "grad_norm": 6.602749347686768, "learning_rate": 3.662790697674418e-05, "loss": 0.7601, "step": 1075 }, { "epoch": 0.7523939808481532, "grad_norm": 38.393550872802734, "learning_rate": 3.748290013679891e-05, "loss": 0.748, "step": 1100 }, { "epoch": 0.7694938440492476, "grad_norm": 11.72321891784668, "learning_rate": 3.8337893296853625e-05, "loss": 0.7516, "step": 1125 }, { "epoch": 0.786593707250342, "grad_norm": 11.254487991333008, "learning_rate": 3.9192886456908346e-05, "loss": 0.7186, "step": 1150 }, { "epoch": 0.8036935704514364, "grad_norm": 44.11043930053711, "learning_rate": 4.004787961696307e-05, "loss": 0.7565, "step": 1175 }, { "epoch": 0.8207934336525308, "grad_norm": 11.987720489501953, "learning_rate": 4.090287277701779e-05, "loss": 0.7304, "step": 1200 }, { "epoch": 0.8378932968536251, "grad_norm": 17.10840606689453, "learning_rate": 4.17578659370725e-05, "loss": 0.8912, "step": 1225 }, { "epoch": 0.8549931600547196, "grad_norm": 19.49997901916504, "learning_rate": 4.261285909712722e-05, "loss": 0.9355, "step": 1250 }, { "epoch": 0.872093023255814, "grad_norm": 12.431605339050293, "learning_rate": 4.3467852257181944e-05, "loss": 0.7146, "step": 1275 }, { "epoch": 0.8891928864569083, "grad_norm": 20.540786743164062, "learning_rate": 4.4322845417236665e-05, "loss": 0.8042, "step": 1300 }, { "epoch": 0.9062927496580028, "grad_norm": 28.939634323120117, "learning_rate": 4.517783857729138e-05, "loss": 0.7885, "step": 1325 }, { "epoch": 0.9233926128590971, "grad_norm": 27.024660110473633, "learning_rate": 4.6032831737346106e-05, "loss": 0.7953, "step": 1350 }, { "epoch": 0.9404924760601915, "grad_norm": 47.79359436035156, "learning_rate": 4.688782489740082e-05, "loss": 0.759, "step": 1375 }, { "epoch": 0.957592339261286, "grad_norm": 18.608360290527344, "learning_rate": 4.774281805745554e-05, "loss": 0.7392, "step": 1400 }, { "epoch": 0.9746922024623803, "grad_norm": 16.670150756835938, "learning_rate": 4.859781121751026e-05, "loss": 0.664, "step": 1425 }, { "epoch": 0.9917920656634747, "grad_norm": 21.591880798339844, "learning_rate": 4.945280437756498e-05, "loss": 0.7628, "step": 1450 }, { "epoch": 1.0, "eval_accuracy": 0.7722772277227723, "eval_f1_macro": 0.3963476960209859, "eval_f1_micro": 0.7722772277227723, "eval_f1_weighted": 0.7524459692668548, "eval_loss": 0.6640351414680481, "eval_precision_macro": 0.5662665685743159, "eval_precision_micro": 0.7722772277227723, "eval_precision_weighted": 0.8150598854310834, "eval_recall_macro": 0.4019843036358822, "eval_recall_micro": 0.7722772277227723, "eval_recall_weighted": 0.7722772277227723, "eval_runtime": 19.424, "eval_samples_per_second": 150.793, "eval_steps_per_second": 9.473, "step": 1462 }, { "epoch": 1.008891928864569, "grad_norm": 18.68697738647461, "learning_rate": 4.996580027359781e-05, "loss": 0.6562, "step": 1475 }, { "epoch": 1.0259917920656634, "grad_norm": 26.281583786010742, "learning_rate": 4.9870801033591734e-05, "loss": 0.7318, "step": 1500 }, { "epoch": 1.043091655266758, "grad_norm": 26.66839599609375, "learning_rate": 4.977580179358565e-05, "loss": 0.803, "step": 1525 }, { "epoch": 1.0601915184678523, "grad_norm": 10.613127708435059, "learning_rate": 4.9680802553579575e-05, "loss": 0.536, "step": 1550 }, { "epoch": 1.0772913816689467, "grad_norm": 13.497079849243164, "learning_rate": 4.958580331357349e-05, "loss": 0.6842, "step": 1575 }, { "epoch": 1.094391244870041, "grad_norm": 9.89592170715332, "learning_rate": 4.9490804073567415e-05, "loss": 0.6305, "step": 1600 }, { "epoch": 1.1114911080711354, "grad_norm": 16.67163848876953, "learning_rate": 4.939580483356133e-05, "loss": 0.7628, "step": 1625 }, { "epoch": 1.1285909712722297, "grad_norm": 42.5455207824707, "learning_rate": 4.9300805593555256e-05, "loss": 0.6883, "step": 1650 }, { "epoch": 1.1456908344733243, "grad_norm": 10.086162567138672, "learning_rate": 4.920580635354917e-05, "loss": 0.6851, "step": 1675 }, { "epoch": 1.1627906976744187, "grad_norm": 15.008639335632324, "learning_rate": 4.9110807113543096e-05, "loss": 0.7015, "step": 1700 }, { "epoch": 1.179890560875513, "grad_norm": 36.36772155761719, "learning_rate": 4.901580787353701e-05, "loss": 0.7014, "step": 1725 }, { "epoch": 1.1969904240766074, "grad_norm": 24.153322219848633, "learning_rate": 4.892080863353094e-05, "loss": 0.6344, "step": 1750 }, { "epoch": 1.2140902872777017, "grad_norm": 14.07002067565918, "learning_rate": 4.8825809393524854e-05, "loss": 0.7835, "step": 1775 }, { "epoch": 1.231190150478796, "grad_norm": 7.812533378601074, "learning_rate": 4.873081015351878e-05, "loss": 0.5902, "step": 1800 }, { "epoch": 1.2482900136798905, "grad_norm": 16.708251953125, "learning_rate": 4.8635810913512694e-05, "loss": 0.6682, "step": 1825 }, { "epoch": 1.265389876880985, "grad_norm": 62.408294677734375, "learning_rate": 4.854081167350661e-05, "loss": 0.7275, "step": 1850 }, { "epoch": 1.2824897400820794, "grad_norm": 11.449152946472168, "learning_rate": 4.8445812433500535e-05, "loss": 0.7308, "step": 1875 }, { "epoch": 1.2995896032831737, "grad_norm": 6.670589447021484, "learning_rate": 4.835081319349445e-05, "loss": 0.6819, "step": 1900 }, { "epoch": 1.316689466484268, "grad_norm": 14.783951759338379, "learning_rate": 4.8255813953488375e-05, "loss": 0.7014, "step": 1925 }, { "epoch": 1.3337893296853625, "grad_norm": 11.176630973815918, "learning_rate": 4.816081471348229e-05, "loss": 0.7157, "step": 1950 }, { "epoch": 1.350889192886457, "grad_norm": 14.224772453308105, "learning_rate": 4.8065815473476216e-05, "loss": 0.58, "step": 1975 }, { "epoch": 1.3679890560875512, "grad_norm": 35.03193283081055, "learning_rate": 4.797081623347013e-05, "loss": 0.7904, "step": 2000 }, { "epoch": 1.3850889192886457, "grad_norm": 47.37995147705078, "learning_rate": 4.7875816993464056e-05, "loss": 0.7543, "step": 2025 }, { "epoch": 1.40218878248974, "grad_norm": 13.47080135345459, "learning_rate": 4.778081775345797e-05, "loss": 0.8038, "step": 2050 }, { "epoch": 1.4192886456908345, "grad_norm": 5.6258158683776855, "learning_rate": 4.76858185134519e-05, "loss": 0.7249, "step": 2075 }, { "epoch": 1.4363885088919288, "grad_norm": 8.781723976135254, "learning_rate": 4.7590819273445814e-05, "loss": 0.5521, "step": 2100 }, { "epoch": 1.4534883720930232, "grad_norm": 7.260859966278076, "learning_rate": 4.749582003343974e-05, "loss": 0.5422, "step": 2125 }, { "epoch": 1.4705882352941178, "grad_norm": 9.566021919250488, "learning_rate": 4.7400820793433654e-05, "loss": 0.7651, "step": 2150 }, { "epoch": 1.487688098495212, "grad_norm": 26.22560691833496, "learning_rate": 4.730582155342758e-05, "loss": 0.6579, "step": 2175 }, { "epoch": 1.5047879616963065, "grad_norm": 7.469398498535156, "learning_rate": 4.7210822313421495e-05, "loss": 0.7464, "step": 2200 }, { "epoch": 1.5218878248974008, "grad_norm": 12.48919677734375, "learning_rate": 4.711582307341542e-05, "loss": 0.6315, "step": 2225 }, { "epoch": 1.5389876880984952, "grad_norm": 7.278232097625732, "learning_rate": 4.7020823833409335e-05, "loss": 0.5035, "step": 2250 }, { "epoch": 1.5560875512995898, "grad_norm": 20.841951370239258, "learning_rate": 4.692582459340326e-05, "loss": 0.7368, "step": 2275 }, { "epoch": 1.573187414500684, "grad_norm": 19.840713500976562, "learning_rate": 4.6830825353397176e-05, "loss": 0.7734, "step": 2300 }, { "epoch": 1.5902872777017785, "grad_norm": 20.479629516601562, "learning_rate": 4.673582611339109e-05, "loss": 0.6132, "step": 2325 }, { "epoch": 1.6073871409028728, "grad_norm": 18.495811462402344, "learning_rate": 4.664082687338501e-05, "loss": 0.5594, "step": 2350 }, { "epoch": 1.6244870041039672, "grad_norm": 8.165420532226562, "learning_rate": 4.6545827633378933e-05, "loss": 0.6215, "step": 2375 }, { "epoch": 1.6415868673050615, "grad_norm": 9.201417922973633, "learning_rate": 4.645082839337285e-05, "loss": 0.5739, "step": 2400 }, { "epoch": 1.658686730506156, "grad_norm": 27.07282829284668, "learning_rate": 4.6355829153366774e-05, "loss": 0.6288, "step": 2425 }, { "epoch": 1.6757865937072505, "grad_norm": 7.3830976486206055, "learning_rate": 4.626082991336069e-05, "loss": 0.5158, "step": 2450 }, { "epoch": 1.6928864569083446, "grad_norm": 14.039732933044434, "learning_rate": 4.6165830673354615e-05, "loss": 0.6536, "step": 2475 }, { "epoch": 1.7099863201094392, "grad_norm": 31.472610473632812, "learning_rate": 4.607083143334854e-05, "loss": 0.6594, "step": 2500 }, { "epoch": 1.7270861833105335, "grad_norm": 13.184996604919434, "learning_rate": 4.5975832193342455e-05, "loss": 0.6299, "step": 2525 }, { "epoch": 1.744186046511628, "grad_norm": 5.3301286697387695, "learning_rate": 4.588083295333638e-05, "loss": 0.5745, "step": 2550 }, { "epoch": 1.7612859097127223, "grad_norm": 5.333646774291992, "learning_rate": 4.5785833713330296e-05, "loss": 0.5694, "step": 2575 }, { "epoch": 1.7783857729138166, "grad_norm": 22.24896240234375, "learning_rate": 4.569083447332422e-05, "loss": 0.5209, "step": 2600 }, { "epoch": 1.7954856361149112, "grad_norm": 10.656512260437012, "learning_rate": 4.5595835233318136e-05, "loss": 0.5069, "step": 2625 }, { "epoch": 1.8125854993160053, "grad_norm": 14.687119483947754, "learning_rate": 4.550083599331206e-05, "loss": 0.5486, "step": 2650 }, { "epoch": 1.8296853625171, "grad_norm": 22.30952262878418, "learning_rate": 4.540583675330598e-05, "loss": 0.6733, "step": 2675 }, { "epoch": 1.8467852257181943, "grad_norm": 9.407474517822266, "learning_rate": 4.53108375132999e-05, "loss": 0.4365, "step": 2700 }, { "epoch": 1.8638850889192886, "grad_norm": 9.207115173339844, "learning_rate": 4.521583827329382e-05, "loss": 0.6675, "step": 2725 }, { "epoch": 1.8809849521203832, "grad_norm": 22.828750610351562, "learning_rate": 4.5120839033287734e-05, "loss": 0.5888, "step": 2750 }, { "epoch": 1.8980848153214773, "grad_norm": 6.441003322601318, "learning_rate": 4.502583979328165e-05, "loss": 0.4317, "step": 2775 }, { "epoch": 1.915184678522572, "grad_norm": 7.692673683166504, "learning_rate": 4.4930840553275575e-05, "loss": 0.622, "step": 2800 }, { "epoch": 1.9322845417236663, "grad_norm": 11.904592514038086, "learning_rate": 4.483584131326949e-05, "loss": 0.5188, "step": 2825 }, { "epoch": 1.9493844049247606, "grad_norm": 7.662288188934326, "learning_rate": 4.4740842073263415e-05, "loss": 0.7301, "step": 2850 }, { "epoch": 1.966484268125855, "grad_norm": 14.738003730773926, "learning_rate": 4.464584283325733e-05, "loss": 0.6253, "step": 2875 }, { "epoch": 1.9835841313269493, "grad_norm": 19.53965950012207, "learning_rate": 4.4550843593251256e-05, "loss": 0.5386, "step": 2900 }, { "epoch": 2.0, "eval_accuracy": 0.8098327074086719, "eval_f1_macro": 0.5180108004626454, "eval_f1_micro": 0.8098327074086719, "eval_f1_weighted": 0.8111086454170916, "eval_loss": 0.5644355416297913, "eval_precision_macro": 0.703611295965861, "eval_precision_micro": 0.8098327074086719, "eval_precision_weighted": 0.8679013857559181, "eval_recall_macro": 0.5354131711427872, "eval_recall_micro": 0.8098327074086719, "eval_recall_weighted": 0.8098327074086719, "eval_runtime": 19.2316, "eval_samples_per_second": 152.302, "eval_steps_per_second": 9.568, "step": 2924 }, { "epoch": 2.000683994528044, "grad_norm": 21.892972946166992, "learning_rate": 4.445584435324517e-05, "loss": 0.6068, "step": 2925 }, { "epoch": 2.017783857729138, "grad_norm": 21.534278869628906, "learning_rate": 4.4360845113239096e-05, "loss": 0.5734, "step": 2950 }, { "epoch": 2.0348837209302326, "grad_norm": 5.837319850921631, "learning_rate": 4.426584587323301e-05, "loss": 0.6038, "step": 2975 }, { "epoch": 2.0519835841313268, "grad_norm": 9.711400985717773, "learning_rate": 4.417084663322694e-05, "loss": 0.5024, "step": 3000 }, { "epoch": 2.0690834473324213, "grad_norm": 6.591183185577393, "learning_rate": 4.4075847393220854e-05, "loss": 0.6356, "step": 3025 }, { "epoch": 2.086183310533516, "grad_norm": 6.523811340332031, "learning_rate": 4.398084815321478e-05, "loss": 0.6886, "step": 3050 }, { "epoch": 2.10328317373461, "grad_norm": 14.16163158416748, "learning_rate": 4.38858489132087e-05, "loss": 0.4388, "step": 3075 }, { "epoch": 2.1203830369357046, "grad_norm": 17.01984214782715, "learning_rate": 4.379084967320262e-05, "loss": 0.5274, "step": 3100 }, { "epoch": 2.1374829001367988, "grad_norm": 14.736923217773438, "learning_rate": 4.369585043319654e-05, "loss": 0.5568, "step": 3125 }, { "epoch": 2.1545827633378933, "grad_norm": 10.865409851074219, "learning_rate": 4.360085119319046e-05, "loss": 0.5113, "step": 3150 }, { "epoch": 2.1716826265389875, "grad_norm": 22.429012298583984, "learning_rate": 4.3505851953184376e-05, "loss": 0.6578, "step": 3175 }, { "epoch": 2.188782489740082, "grad_norm": 24.677793502807617, "learning_rate": 4.34108527131783e-05, "loss": 0.7861, "step": 3200 }, { "epoch": 2.2058823529411766, "grad_norm": 10.499431610107422, "learning_rate": 4.3315853473172216e-05, "loss": 0.5801, "step": 3225 }, { "epoch": 2.2229822161422708, "grad_norm": 3.640758514404297, "learning_rate": 4.322085423316613e-05, "loss": 0.6225, "step": 3250 }, { "epoch": 2.2400820793433653, "grad_norm": 2.9993128776550293, "learning_rate": 4.312585499316006e-05, "loss": 0.5234, "step": 3275 }, { "epoch": 2.2571819425444595, "grad_norm": 31.736900329589844, "learning_rate": 4.3030855753153974e-05, "loss": 0.6043, "step": 3300 }, { "epoch": 2.274281805745554, "grad_norm": 24.770673751831055, "learning_rate": 4.29358565131479e-05, "loss": 0.468, "step": 3325 }, { "epoch": 2.2913816689466486, "grad_norm": 16.497438430786133, "learning_rate": 4.2840857273141814e-05, "loss": 0.6247, "step": 3350 }, { "epoch": 2.3084815321477428, "grad_norm": 11.79759693145752, "learning_rate": 4.274585803313574e-05, "loss": 0.4275, "step": 3375 }, { "epoch": 2.3255813953488373, "grad_norm": 7.504731178283691, "learning_rate": 4.2650858793129655e-05, "loss": 0.5987, "step": 3400 }, { "epoch": 2.3426812585499315, "grad_norm": 9.874656677246094, "learning_rate": 4.2559659522723825e-05, "loss": 0.3649, "step": 3425 }, { "epoch": 2.359781121751026, "grad_norm": 4.051993370056152, "learning_rate": 4.246466028271774e-05, "loss": 0.6407, "step": 3450 }, { "epoch": 2.37688098495212, "grad_norm": 25.524431228637695, "learning_rate": 4.236966104271166e-05, "loss": 0.6522, "step": 3475 }, { "epoch": 2.3939808481532148, "grad_norm": 10.878904342651367, "learning_rate": 4.2274661802705576e-05, "loss": 0.568, "step": 3500 }, { "epoch": 2.4110807113543093, "grad_norm": 26.06123924255371, "learning_rate": 4.21796625626995e-05, "loss": 0.4338, "step": 3525 }, { "epoch": 2.4281805745554035, "grad_norm": 9.708687782287598, "learning_rate": 4.2084663322693416e-05, "loss": 0.5463, "step": 3550 }, { "epoch": 2.445280437756498, "grad_norm": 25.002485275268555, "learning_rate": 4.198966408268734e-05, "loss": 0.4026, "step": 3575 }, { "epoch": 2.462380300957592, "grad_norm": 27.914440155029297, "learning_rate": 4.189466484268126e-05, "loss": 0.4373, "step": 3600 }, { "epoch": 2.4794801641586868, "grad_norm": 16.424388885498047, "learning_rate": 4.179966560267518e-05, "loss": 0.6144, "step": 3625 }, { "epoch": 2.496580027359781, "grad_norm": 18.099689483642578, "learning_rate": 4.17046663626691e-05, "loss": 0.4678, "step": 3650 }, { "epoch": 2.5136798905608755, "grad_norm": 7.258431434631348, "learning_rate": 4.160966712266302e-05, "loss": 0.5745, "step": 3675 }, { "epoch": 2.53077975376197, "grad_norm": 12.761260986328125, "learning_rate": 4.151466788265694e-05, "loss": 0.564, "step": 3700 }, { "epoch": 2.547879616963064, "grad_norm": 10.828967094421387, "learning_rate": 4.141966864265086e-05, "loss": 0.5247, "step": 3725 }, { "epoch": 2.5649794801641588, "grad_norm": 8.467166900634766, "learning_rate": 4.1324669402644785e-05, "loss": 0.6447, "step": 3750 }, { "epoch": 2.582079343365253, "grad_norm": 5.6609883308410645, "learning_rate": 4.12296701626387e-05, "loss": 0.4998, "step": 3775 }, { "epoch": 2.5991792065663475, "grad_norm": 8.889337539672852, "learning_rate": 4.1134670922632626e-05, "loss": 0.6064, "step": 3800 }, { "epoch": 2.616279069767442, "grad_norm": 10.798035621643066, "learning_rate": 4.103967168262654e-05, "loss": 0.4447, "step": 3825 }, { "epoch": 2.633378932968536, "grad_norm": 5.452834129333496, "learning_rate": 4.0944672442620466e-05, "loss": 0.5188, "step": 3850 }, { "epoch": 2.650478796169631, "grad_norm": 21.596166610717773, "learning_rate": 4.084967320261438e-05, "loss": 0.486, "step": 3875 }, { "epoch": 2.667578659370725, "grad_norm": 27.14288330078125, "learning_rate": 4.07546739626083e-05, "loss": 0.586, "step": 3900 }, { "epoch": 2.6846785225718195, "grad_norm": 6.16865873336792, "learning_rate": 4.065967472260222e-05, "loss": 0.5886, "step": 3925 }, { "epoch": 2.701778385772914, "grad_norm": 10.905616760253906, "learning_rate": 4.056467548259614e-05, "loss": 0.5854, "step": 3950 }, { "epoch": 2.718878248974008, "grad_norm": 23.615705490112305, "learning_rate": 4.046967624259006e-05, "loss": 0.5838, "step": 3975 }, { "epoch": 2.7359781121751023, "grad_norm": 20.192031860351562, "learning_rate": 4.037467700258398e-05, "loss": 0.4683, "step": 4000 }, { "epoch": 2.753077975376197, "grad_norm": 4.011788845062256, "learning_rate": 4.02796777625779e-05, "loss": 0.4985, "step": 4025 }, { "epoch": 2.7701778385772915, "grad_norm": 4.153777122497559, "learning_rate": 4.018467852257182e-05, "loss": 0.654, "step": 4050 }, { "epoch": 2.7872777017783856, "grad_norm": 7.651889324188232, "learning_rate": 4.008967928256574e-05, "loss": 0.5508, "step": 4075 }, { "epoch": 2.80437756497948, "grad_norm": 4.187475681304932, "learning_rate": 3.999468004255966e-05, "loss": 0.5831, "step": 4100 }, { "epoch": 2.8214774281805743, "grad_norm": 15.8203763961792, "learning_rate": 3.989968080255358e-05, "loss": 0.5752, "step": 4125 }, { "epoch": 2.838577291381669, "grad_norm": 3.2058730125427246, "learning_rate": 3.98046815625475e-05, "loss": 0.5395, "step": 4150 }, { "epoch": 2.8556771545827635, "grad_norm": 20.272655487060547, "learning_rate": 3.970968232254142e-05, "loss": 0.6421, "step": 4175 }, { "epoch": 2.8727770177838576, "grad_norm": 12.041251182556152, "learning_rate": 3.961468308253534e-05, "loss": 0.5199, "step": 4200 }, { "epoch": 2.889876880984952, "grad_norm": 8.457215309143066, "learning_rate": 3.951968384252926e-05, "loss": 0.3992, "step": 4225 }, { "epoch": 2.9069767441860463, "grad_norm": 7.0436787605285645, "learning_rate": 3.9424684602523184e-05, "loss": 0.4758, "step": 4250 }, { "epoch": 2.924076607387141, "grad_norm": 14.91028881072998, "learning_rate": 3.93296853625171e-05, "loss": 0.565, "step": 4275 }, { "epoch": 2.9411764705882355, "grad_norm": 6.1303229331970215, "learning_rate": 3.9234686122511024e-05, "loss": 0.4658, "step": 4300 }, { "epoch": 2.9582763337893296, "grad_norm": 3.7947280406951904, "learning_rate": 3.913968688250494e-05, "loss": 0.4188, "step": 4325 }, { "epoch": 2.975376196990424, "grad_norm": 6.020320415496826, "learning_rate": 3.9044687642498865e-05, "loss": 0.5847, "step": 4350 }, { "epoch": 2.9924760601915183, "grad_norm": 3.9531519412994385, "learning_rate": 3.894968840249278e-05, "loss": 0.5847, "step": 4375 }, { "epoch": 3.0, "eval_accuracy": 0.9061113007852509, "eval_f1_macro": 0.7552413664079601, "eval_f1_micro": 0.9061113007852509, "eval_f1_weighted": 0.9035670983782715, "eval_loss": 0.2802155911922455, "eval_precision_macro": 0.8498221458985794, "eval_precision_micro": 0.9061113007852509, "eval_precision_weighted": 0.9095288940815534, "eval_recall_macro": 0.7266443770545475, "eval_recall_micro": 0.9061113007852509, "eval_recall_weighted": 0.9061113007852509, "eval_runtime": 19.2634, "eval_samples_per_second": 152.05, "eval_steps_per_second": 9.552, "step": 4386 } ], "logging_steps": 25, "max_steps": 14620, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.1417490381074596e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }