Kushagra07's picture
Upload folder using huggingface_hub
249278e verified
{
"best_metric": 0.2802155911922455,
"best_model_checkpoint": "autotrain-swinv2-tiny-patch4-window8-256/checkpoint-4386",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 4386,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01709986320109439,
"grad_norm": 16.21043586730957,
"learning_rate": 7.86593707250342e-07,
"loss": 2.7812,
"step": 25
},
{
"epoch": 0.03419972640218878,
"grad_norm": 21.944915771484375,
"learning_rate": 1.6415868673050617e-06,
"loss": 2.6916,
"step": 50
},
{
"epoch": 0.05129958960328317,
"grad_norm": 15.82861328125,
"learning_rate": 2.496580027359781e-06,
"loss": 2.5256,
"step": 75
},
{
"epoch": 0.06839945280437756,
"grad_norm": 28.86319351196289,
"learning_rate": 3.3515731874145007e-06,
"loss": 2.2838,
"step": 100
},
{
"epoch": 0.08549931600547196,
"grad_norm": 23.919939041137695,
"learning_rate": 4.172366621067032e-06,
"loss": 1.993,
"step": 125
},
{
"epoch": 0.10259917920656635,
"grad_norm": 22.423189163208008,
"learning_rate": 5.027359781121752e-06,
"loss": 1.9762,
"step": 150
},
{
"epoch": 0.11969904240766074,
"grad_norm": 18.18659210205078,
"learning_rate": 5.882352941176471e-06,
"loss": 1.6428,
"step": 175
},
{
"epoch": 0.13679890560875513,
"grad_norm": 25.3078670501709,
"learning_rate": 6.73734610123119e-06,
"loss": 1.6677,
"step": 200
},
{
"epoch": 0.1538987688098495,
"grad_norm": 33.7442626953125,
"learning_rate": 7.592339261285911e-06,
"loss": 1.5048,
"step": 225
},
{
"epoch": 0.17099863201094392,
"grad_norm": 44.62141799926758,
"learning_rate": 8.44733242134063e-06,
"loss": 1.3579,
"step": 250
},
{
"epoch": 0.1880984952120383,
"grad_norm": 47.94766616821289,
"learning_rate": 9.26812585499316e-06,
"loss": 1.3141,
"step": 275
},
{
"epoch": 0.2051983584131327,
"grad_norm": 50.67353820800781,
"learning_rate": 1.0123119015047879e-05,
"loss": 1.5654,
"step": 300
},
{
"epoch": 0.22229822161422708,
"grad_norm": 34.55750274658203,
"learning_rate": 1.09781121751026e-05,
"loss": 1.3736,
"step": 325
},
{
"epoch": 0.2393980848153215,
"grad_norm": 29.00438117980957,
"learning_rate": 1.183310533515732e-05,
"loss": 1.274,
"step": 350
},
{
"epoch": 0.25649794801641584,
"grad_norm": 24.081707000732422,
"learning_rate": 1.2688098495212038e-05,
"loss": 1.2503,
"step": 375
},
{
"epoch": 0.27359781121751026,
"grad_norm": 23.609891891479492,
"learning_rate": 1.354309165526676e-05,
"loss": 1.0883,
"step": 400
},
{
"epoch": 0.29069767441860467,
"grad_norm": 18.259571075439453,
"learning_rate": 1.4398084815321477e-05,
"loss": 1.1379,
"step": 425
},
{
"epoch": 0.307797537619699,
"grad_norm": 42.96183395385742,
"learning_rate": 1.5253077975376198e-05,
"loss": 1.2596,
"step": 450
},
{
"epoch": 0.32489740082079344,
"grad_norm": 37.27230453491211,
"learning_rate": 1.6108071135430915e-05,
"loss": 1.0158,
"step": 475
},
{
"epoch": 0.34199726402188785,
"grad_norm": 113.23546600341797,
"learning_rate": 1.6963064295485636e-05,
"loss": 1.0687,
"step": 500
},
{
"epoch": 0.3590971272229822,
"grad_norm": 19.023685455322266,
"learning_rate": 1.7818057455540357e-05,
"loss": 0.9566,
"step": 525
},
{
"epoch": 0.3761969904240766,
"grad_norm": 29.44492530822754,
"learning_rate": 1.8673050615595075e-05,
"loss": 1.128,
"step": 550
},
{
"epoch": 0.393296853625171,
"grad_norm": 42.041595458984375,
"learning_rate": 1.9528043775649796e-05,
"loss": 1.0133,
"step": 575
},
{
"epoch": 0.4103967168262654,
"grad_norm": 47.55967712402344,
"learning_rate": 2.0383036935704516e-05,
"loss": 1.0888,
"step": 600
},
{
"epoch": 0.4274965800273598,
"grad_norm": 12.591029167175293,
"learning_rate": 2.1238030095759234e-05,
"loss": 0.9936,
"step": 625
},
{
"epoch": 0.44459644322845415,
"grad_norm": 31.012723922729492,
"learning_rate": 2.2093023255813955e-05,
"loss": 0.9765,
"step": 650
},
{
"epoch": 0.46169630642954856,
"grad_norm": 39.08427047729492,
"learning_rate": 2.2948016415868672e-05,
"loss": 0.9398,
"step": 675
},
{
"epoch": 0.478796169630643,
"grad_norm": 34.639007568359375,
"learning_rate": 2.3803009575923393e-05,
"loss": 0.7951,
"step": 700
},
{
"epoch": 0.49589603283173733,
"grad_norm": 40.917171478271484,
"learning_rate": 2.4658002735978114e-05,
"loss": 0.7592,
"step": 725
},
{
"epoch": 0.5129958960328317,
"grad_norm": 27.42568016052246,
"learning_rate": 2.5512995896032832e-05,
"loss": 0.8023,
"step": 750
},
{
"epoch": 0.5300957592339262,
"grad_norm": 10.912271499633789,
"learning_rate": 2.6367989056087556e-05,
"loss": 0.832,
"step": 775
},
{
"epoch": 0.5471956224350205,
"grad_norm": 35.33407211303711,
"learning_rate": 2.7222982216142274e-05,
"loss": 0.8747,
"step": 800
},
{
"epoch": 0.5642954856361149,
"grad_norm": 23.503469467163086,
"learning_rate": 2.807797537619699e-05,
"loss": 0.9251,
"step": 825
},
{
"epoch": 0.5813953488372093,
"grad_norm": 28.509077072143555,
"learning_rate": 2.893296853625171e-05,
"loss": 0.9039,
"step": 850
},
{
"epoch": 0.5984952120383037,
"grad_norm": 20.486900329589844,
"learning_rate": 2.9787961696306433e-05,
"loss": 0.9576,
"step": 875
},
{
"epoch": 0.615595075239398,
"grad_norm": 26.435588836669922,
"learning_rate": 3.064295485636115e-05,
"loss": 0.8952,
"step": 900
},
{
"epoch": 0.6326949384404925,
"grad_norm": 25.732032775878906,
"learning_rate": 3.149794801641587e-05,
"loss": 0.8222,
"step": 925
},
{
"epoch": 0.6497948016415869,
"grad_norm": 33.15768051147461,
"learning_rate": 3.235294117647059e-05,
"loss": 0.9005,
"step": 950
},
{
"epoch": 0.6668946648426812,
"grad_norm": 35.35673141479492,
"learning_rate": 3.3207934336525306e-05,
"loss": 1.081,
"step": 975
},
{
"epoch": 0.6839945280437757,
"grad_norm": 46.90694046020508,
"learning_rate": 3.406292749658003e-05,
"loss": 0.7733,
"step": 1000
},
{
"epoch": 0.70109439124487,
"grad_norm": 26.313335418701172,
"learning_rate": 3.491792065663475e-05,
"loss": 0.919,
"step": 1025
},
{
"epoch": 0.7181942544459644,
"grad_norm": 44.662662506103516,
"learning_rate": 3.577291381668947e-05,
"loss": 0.6723,
"step": 1050
},
{
"epoch": 0.7352941176470589,
"grad_norm": 6.602749347686768,
"learning_rate": 3.662790697674418e-05,
"loss": 0.7601,
"step": 1075
},
{
"epoch": 0.7523939808481532,
"grad_norm": 38.393550872802734,
"learning_rate": 3.748290013679891e-05,
"loss": 0.748,
"step": 1100
},
{
"epoch": 0.7694938440492476,
"grad_norm": 11.72321891784668,
"learning_rate": 3.8337893296853625e-05,
"loss": 0.7516,
"step": 1125
},
{
"epoch": 0.786593707250342,
"grad_norm": 11.254487991333008,
"learning_rate": 3.9192886456908346e-05,
"loss": 0.7186,
"step": 1150
},
{
"epoch": 0.8036935704514364,
"grad_norm": 44.11043930053711,
"learning_rate": 4.004787961696307e-05,
"loss": 0.7565,
"step": 1175
},
{
"epoch": 0.8207934336525308,
"grad_norm": 11.987720489501953,
"learning_rate": 4.090287277701779e-05,
"loss": 0.7304,
"step": 1200
},
{
"epoch": 0.8378932968536251,
"grad_norm": 17.10840606689453,
"learning_rate": 4.17578659370725e-05,
"loss": 0.8912,
"step": 1225
},
{
"epoch": 0.8549931600547196,
"grad_norm": 19.49997901916504,
"learning_rate": 4.261285909712722e-05,
"loss": 0.9355,
"step": 1250
},
{
"epoch": 0.872093023255814,
"grad_norm": 12.431605339050293,
"learning_rate": 4.3467852257181944e-05,
"loss": 0.7146,
"step": 1275
},
{
"epoch": 0.8891928864569083,
"grad_norm": 20.540786743164062,
"learning_rate": 4.4322845417236665e-05,
"loss": 0.8042,
"step": 1300
},
{
"epoch": 0.9062927496580028,
"grad_norm": 28.939634323120117,
"learning_rate": 4.517783857729138e-05,
"loss": 0.7885,
"step": 1325
},
{
"epoch": 0.9233926128590971,
"grad_norm": 27.024660110473633,
"learning_rate": 4.6032831737346106e-05,
"loss": 0.7953,
"step": 1350
},
{
"epoch": 0.9404924760601915,
"grad_norm": 47.79359436035156,
"learning_rate": 4.688782489740082e-05,
"loss": 0.759,
"step": 1375
},
{
"epoch": 0.957592339261286,
"grad_norm": 18.608360290527344,
"learning_rate": 4.774281805745554e-05,
"loss": 0.7392,
"step": 1400
},
{
"epoch": 0.9746922024623803,
"grad_norm": 16.670150756835938,
"learning_rate": 4.859781121751026e-05,
"loss": 0.664,
"step": 1425
},
{
"epoch": 0.9917920656634747,
"grad_norm": 21.591880798339844,
"learning_rate": 4.945280437756498e-05,
"loss": 0.7628,
"step": 1450
},
{
"epoch": 1.0,
"eval_accuracy": 0.7722772277227723,
"eval_f1_macro": 0.3963476960209859,
"eval_f1_micro": 0.7722772277227723,
"eval_f1_weighted": 0.7524459692668548,
"eval_loss": 0.6640351414680481,
"eval_precision_macro": 0.5662665685743159,
"eval_precision_micro": 0.7722772277227723,
"eval_precision_weighted": 0.8150598854310834,
"eval_recall_macro": 0.4019843036358822,
"eval_recall_micro": 0.7722772277227723,
"eval_recall_weighted": 0.7722772277227723,
"eval_runtime": 19.424,
"eval_samples_per_second": 150.793,
"eval_steps_per_second": 9.473,
"step": 1462
},
{
"epoch": 1.008891928864569,
"grad_norm": 18.68697738647461,
"learning_rate": 4.996580027359781e-05,
"loss": 0.6562,
"step": 1475
},
{
"epoch": 1.0259917920656634,
"grad_norm": 26.281583786010742,
"learning_rate": 4.9870801033591734e-05,
"loss": 0.7318,
"step": 1500
},
{
"epoch": 1.043091655266758,
"grad_norm": 26.66839599609375,
"learning_rate": 4.977580179358565e-05,
"loss": 0.803,
"step": 1525
},
{
"epoch": 1.0601915184678523,
"grad_norm": 10.613127708435059,
"learning_rate": 4.9680802553579575e-05,
"loss": 0.536,
"step": 1550
},
{
"epoch": 1.0772913816689467,
"grad_norm": 13.497079849243164,
"learning_rate": 4.958580331357349e-05,
"loss": 0.6842,
"step": 1575
},
{
"epoch": 1.094391244870041,
"grad_norm": 9.89592170715332,
"learning_rate": 4.9490804073567415e-05,
"loss": 0.6305,
"step": 1600
},
{
"epoch": 1.1114911080711354,
"grad_norm": 16.67163848876953,
"learning_rate": 4.939580483356133e-05,
"loss": 0.7628,
"step": 1625
},
{
"epoch": 1.1285909712722297,
"grad_norm": 42.5455207824707,
"learning_rate": 4.9300805593555256e-05,
"loss": 0.6883,
"step": 1650
},
{
"epoch": 1.1456908344733243,
"grad_norm": 10.086162567138672,
"learning_rate": 4.920580635354917e-05,
"loss": 0.6851,
"step": 1675
},
{
"epoch": 1.1627906976744187,
"grad_norm": 15.008639335632324,
"learning_rate": 4.9110807113543096e-05,
"loss": 0.7015,
"step": 1700
},
{
"epoch": 1.179890560875513,
"grad_norm": 36.36772155761719,
"learning_rate": 4.901580787353701e-05,
"loss": 0.7014,
"step": 1725
},
{
"epoch": 1.1969904240766074,
"grad_norm": 24.153322219848633,
"learning_rate": 4.892080863353094e-05,
"loss": 0.6344,
"step": 1750
},
{
"epoch": 1.2140902872777017,
"grad_norm": 14.07002067565918,
"learning_rate": 4.8825809393524854e-05,
"loss": 0.7835,
"step": 1775
},
{
"epoch": 1.231190150478796,
"grad_norm": 7.812533378601074,
"learning_rate": 4.873081015351878e-05,
"loss": 0.5902,
"step": 1800
},
{
"epoch": 1.2482900136798905,
"grad_norm": 16.708251953125,
"learning_rate": 4.8635810913512694e-05,
"loss": 0.6682,
"step": 1825
},
{
"epoch": 1.265389876880985,
"grad_norm": 62.408294677734375,
"learning_rate": 4.854081167350661e-05,
"loss": 0.7275,
"step": 1850
},
{
"epoch": 1.2824897400820794,
"grad_norm": 11.449152946472168,
"learning_rate": 4.8445812433500535e-05,
"loss": 0.7308,
"step": 1875
},
{
"epoch": 1.2995896032831737,
"grad_norm": 6.670589447021484,
"learning_rate": 4.835081319349445e-05,
"loss": 0.6819,
"step": 1900
},
{
"epoch": 1.316689466484268,
"grad_norm": 14.783951759338379,
"learning_rate": 4.8255813953488375e-05,
"loss": 0.7014,
"step": 1925
},
{
"epoch": 1.3337893296853625,
"grad_norm": 11.176630973815918,
"learning_rate": 4.816081471348229e-05,
"loss": 0.7157,
"step": 1950
},
{
"epoch": 1.350889192886457,
"grad_norm": 14.224772453308105,
"learning_rate": 4.8065815473476216e-05,
"loss": 0.58,
"step": 1975
},
{
"epoch": 1.3679890560875512,
"grad_norm": 35.03193283081055,
"learning_rate": 4.797081623347013e-05,
"loss": 0.7904,
"step": 2000
},
{
"epoch": 1.3850889192886457,
"grad_norm": 47.37995147705078,
"learning_rate": 4.7875816993464056e-05,
"loss": 0.7543,
"step": 2025
},
{
"epoch": 1.40218878248974,
"grad_norm": 13.47080135345459,
"learning_rate": 4.778081775345797e-05,
"loss": 0.8038,
"step": 2050
},
{
"epoch": 1.4192886456908345,
"grad_norm": 5.6258158683776855,
"learning_rate": 4.76858185134519e-05,
"loss": 0.7249,
"step": 2075
},
{
"epoch": 1.4363885088919288,
"grad_norm": 8.781723976135254,
"learning_rate": 4.7590819273445814e-05,
"loss": 0.5521,
"step": 2100
},
{
"epoch": 1.4534883720930232,
"grad_norm": 7.260859966278076,
"learning_rate": 4.749582003343974e-05,
"loss": 0.5422,
"step": 2125
},
{
"epoch": 1.4705882352941178,
"grad_norm": 9.566021919250488,
"learning_rate": 4.7400820793433654e-05,
"loss": 0.7651,
"step": 2150
},
{
"epoch": 1.487688098495212,
"grad_norm": 26.22560691833496,
"learning_rate": 4.730582155342758e-05,
"loss": 0.6579,
"step": 2175
},
{
"epoch": 1.5047879616963065,
"grad_norm": 7.469398498535156,
"learning_rate": 4.7210822313421495e-05,
"loss": 0.7464,
"step": 2200
},
{
"epoch": 1.5218878248974008,
"grad_norm": 12.48919677734375,
"learning_rate": 4.711582307341542e-05,
"loss": 0.6315,
"step": 2225
},
{
"epoch": 1.5389876880984952,
"grad_norm": 7.278232097625732,
"learning_rate": 4.7020823833409335e-05,
"loss": 0.5035,
"step": 2250
},
{
"epoch": 1.5560875512995898,
"grad_norm": 20.841951370239258,
"learning_rate": 4.692582459340326e-05,
"loss": 0.7368,
"step": 2275
},
{
"epoch": 1.573187414500684,
"grad_norm": 19.840713500976562,
"learning_rate": 4.6830825353397176e-05,
"loss": 0.7734,
"step": 2300
},
{
"epoch": 1.5902872777017785,
"grad_norm": 20.479629516601562,
"learning_rate": 4.673582611339109e-05,
"loss": 0.6132,
"step": 2325
},
{
"epoch": 1.6073871409028728,
"grad_norm": 18.495811462402344,
"learning_rate": 4.664082687338501e-05,
"loss": 0.5594,
"step": 2350
},
{
"epoch": 1.6244870041039672,
"grad_norm": 8.165420532226562,
"learning_rate": 4.6545827633378933e-05,
"loss": 0.6215,
"step": 2375
},
{
"epoch": 1.6415868673050615,
"grad_norm": 9.201417922973633,
"learning_rate": 4.645082839337285e-05,
"loss": 0.5739,
"step": 2400
},
{
"epoch": 1.658686730506156,
"grad_norm": 27.07282829284668,
"learning_rate": 4.6355829153366774e-05,
"loss": 0.6288,
"step": 2425
},
{
"epoch": 1.6757865937072505,
"grad_norm": 7.3830976486206055,
"learning_rate": 4.626082991336069e-05,
"loss": 0.5158,
"step": 2450
},
{
"epoch": 1.6928864569083446,
"grad_norm": 14.039732933044434,
"learning_rate": 4.6165830673354615e-05,
"loss": 0.6536,
"step": 2475
},
{
"epoch": 1.7099863201094392,
"grad_norm": 31.472610473632812,
"learning_rate": 4.607083143334854e-05,
"loss": 0.6594,
"step": 2500
},
{
"epoch": 1.7270861833105335,
"grad_norm": 13.184996604919434,
"learning_rate": 4.5975832193342455e-05,
"loss": 0.6299,
"step": 2525
},
{
"epoch": 1.744186046511628,
"grad_norm": 5.3301286697387695,
"learning_rate": 4.588083295333638e-05,
"loss": 0.5745,
"step": 2550
},
{
"epoch": 1.7612859097127223,
"grad_norm": 5.333646774291992,
"learning_rate": 4.5785833713330296e-05,
"loss": 0.5694,
"step": 2575
},
{
"epoch": 1.7783857729138166,
"grad_norm": 22.24896240234375,
"learning_rate": 4.569083447332422e-05,
"loss": 0.5209,
"step": 2600
},
{
"epoch": 1.7954856361149112,
"grad_norm": 10.656512260437012,
"learning_rate": 4.5595835233318136e-05,
"loss": 0.5069,
"step": 2625
},
{
"epoch": 1.8125854993160053,
"grad_norm": 14.687119483947754,
"learning_rate": 4.550083599331206e-05,
"loss": 0.5486,
"step": 2650
},
{
"epoch": 1.8296853625171,
"grad_norm": 22.30952262878418,
"learning_rate": 4.540583675330598e-05,
"loss": 0.6733,
"step": 2675
},
{
"epoch": 1.8467852257181943,
"grad_norm": 9.407474517822266,
"learning_rate": 4.53108375132999e-05,
"loss": 0.4365,
"step": 2700
},
{
"epoch": 1.8638850889192886,
"grad_norm": 9.207115173339844,
"learning_rate": 4.521583827329382e-05,
"loss": 0.6675,
"step": 2725
},
{
"epoch": 1.8809849521203832,
"grad_norm": 22.828750610351562,
"learning_rate": 4.5120839033287734e-05,
"loss": 0.5888,
"step": 2750
},
{
"epoch": 1.8980848153214773,
"grad_norm": 6.441003322601318,
"learning_rate": 4.502583979328165e-05,
"loss": 0.4317,
"step": 2775
},
{
"epoch": 1.915184678522572,
"grad_norm": 7.692673683166504,
"learning_rate": 4.4930840553275575e-05,
"loss": 0.622,
"step": 2800
},
{
"epoch": 1.9322845417236663,
"grad_norm": 11.904592514038086,
"learning_rate": 4.483584131326949e-05,
"loss": 0.5188,
"step": 2825
},
{
"epoch": 1.9493844049247606,
"grad_norm": 7.662288188934326,
"learning_rate": 4.4740842073263415e-05,
"loss": 0.7301,
"step": 2850
},
{
"epoch": 1.966484268125855,
"grad_norm": 14.738003730773926,
"learning_rate": 4.464584283325733e-05,
"loss": 0.6253,
"step": 2875
},
{
"epoch": 1.9835841313269493,
"grad_norm": 19.53965950012207,
"learning_rate": 4.4550843593251256e-05,
"loss": 0.5386,
"step": 2900
},
{
"epoch": 2.0,
"eval_accuracy": 0.8098327074086719,
"eval_f1_macro": 0.5180108004626454,
"eval_f1_micro": 0.8098327074086719,
"eval_f1_weighted": 0.8111086454170916,
"eval_loss": 0.5644355416297913,
"eval_precision_macro": 0.703611295965861,
"eval_precision_micro": 0.8098327074086719,
"eval_precision_weighted": 0.8679013857559181,
"eval_recall_macro": 0.5354131711427872,
"eval_recall_micro": 0.8098327074086719,
"eval_recall_weighted": 0.8098327074086719,
"eval_runtime": 19.2316,
"eval_samples_per_second": 152.302,
"eval_steps_per_second": 9.568,
"step": 2924
},
{
"epoch": 2.000683994528044,
"grad_norm": 21.892972946166992,
"learning_rate": 4.445584435324517e-05,
"loss": 0.6068,
"step": 2925
},
{
"epoch": 2.017783857729138,
"grad_norm": 21.534278869628906,
"learning_rate": 4.4360845113239096e-05,
"loss": 0.5734,
"step": 2950
},
{
"epoch": 2.0348837209302326,
"grad_norm": 5.837319850921631,
"learning_rate": 4.426584587323301e-05,
"loss": 0.6038,
"step": 2975
},
{
"epoch": 2.0519835841313268,
"grad_norm": 9.711400985717773,
"learning_rate": 4.417084663322694e-05,
"loss": 0.5024,
"step": 3000
},
{
"epoch": 2.0690834473324213,
"grad_norm": 6.591183185577393,
"learning_rate": 4.4075847393220854e-05,
"loss": 0.6356,
"step": 3025
},
{
"epoch": 2.086183310533516,
"grad_norm": 6.523811340332031,
"learning_rate": 4.398084815321478e-05,
"loss": 0.6886,
"step": 3050
},
{
"epoch": 2.10328317373461,
"grad_norm": 14.16163158416748,
"learning_rate": 4.38858489132087e-05,
"loss": 0.4388,
"step": 3075
},
{
"epoch": 2.1203830369357046,
"grad_norm": 17.01984214782715,
"learning_rate": 4.379084967320262e-05,
"loss": 0.5274,
"step": 3100
},
{
"epoch": 2.1374829001367988,
"grad_norm": 14.736923217773438,
"learning_rate": 4.369585043319654e-05,
"loss": 0.5568,
"step": 3125
},
{
"epoch": 2.1545827633378933,
"grad_norm": 10.865409851074219,
"learning_rate": 4.360085119319046e-05,
"loss": 0.5113,
"step": 3150
},
{
"epoch": 2.1716826265389875,
"grad_norm": 22.429012298583984,
"learning_rate": 4.3505851953184376e-05,
"loss": 0.6578,
"step": 3175
},
{
"epoch": 2.188782489740082,
"grad_norm": 24.677793502807617,
"learning_rate": 4.34108527131783e-05,
"loss": 0.7861,
"step": 3200
},
{
"epoch": 2.2058823529411766,
"grad_norm": 10.499431610107422,
"learning_rate": 4.3315853473172216e-05,
"loss": 0.5801,
"step": 3225
},
{
"epoch": 2.2229822161422708,
"grad_norm": 3.640758514404297,
"learning_rate": 4.322085423316613e-05,
"loss": 0.6225,
"step": 3250
},
{
"epoch": 2.2400820793433653,
"grad_norm": 2.9993128776550293,
"learning_rate": 4.312585499316006e-05,
"loss": 0.5234,
"step": 3275
},
{
"epoch": 2.2571819425444595,
"grad_norm": 31.736900329589844,
"learning_rate": 4.3030855753153974e-05,
"loss": 0.6043,
"step": 3300
},
{
"epoch": 2.274281805745554,
"grad_norm": 24.770673751831055,
"learning_rate": 4.29358565131479e-05,
"loss": 0.468,
"step": 3325
},
{
"epoch": 2.2913816689466486,
"grad_norm": 16.497438430786133,
"learning_rate": 4.2840857273141814e-05,
"loss": 0.6247,
"step": 3350
},
{
"epoch": 2.3084815321477428,
"grad_norm": 11.79759693145752,
"learning_rate": 4.274585803313574e-05,
"loss": 0.4275,
"step": 3375
},
{
"epoch": 2.3255813953488373,
"grad_norm": 7.504731178283691,
"learning_rate": 4.2650858793129655e-05,
"loss": 0.5987,
"step": 3400
},
{
"epoch": 2.3426812585499315,
"grad_norm": 9.874656677246094,
"learning_rate": 4.2559659522723825e-05,
"loss": 0.3649,
"step": 3425
},
{
"epoch": 2.359781121751026,
"grad_norm": 4.051993370056152,
"learning_rate": 4.246466028271774e-05,
"loss": 0.6407,
"step": 3450
},
{
"epoch": 2.37688098495212,
"grad_norm": 25.524431228637695,
"learning_rate": 4.236966104271166e-05,
"loss": 0.6522,
"step": 3475
},
{
"epoch": 2.3939808481532148,
"grad_norm": 10.878904342651367,
"learning_rate": 4.2274661802705576e-05,
"loss": 0.568,
"step": 3500
},
{
"epoch": 2.4110807113543093,
"grad_norm": 26.06123924255371,
"learning_rate": 4.21796625626995e-05,
"loss": 0.4338,
"step": 3525
},
{
"epoch": 2.4281805745554035,
"grad_norm": 9.708687782287598,
"learning_rate": 4.2084663322693416e-05,
"loss": 0.5463,
"step": 3550
},
{
"epoch": 2.445280437756498,
"grad_norm": 25.002485275268555,
"learning_rate": 4.198966408268734e-05,
"loss": 0.4026,
"step": 3575
},
{
"epoch": 2.462380300957592,
"grad_norm": 27.914440155029297,
"learning_rate": 4.189466484268126e-05,
"loss": 0.4373,
"step": 3600
},
{
"epoch": 2.4794801641586868,
"grad_norm": 16.424388885498047,
"learning_rate": 4.179966560267518e-05,
"loss": 0.6144,
"step": 3625
},
{
"epoch": 2.496580027359781,
"grad_norm": 18.099689483642578,
"learning_rate": 4.17046663626691e-05,
"loss": 0.4678,
"step": 3650
},
{
"epoch": 2.5136798905608755,
"grad_norm": 7.258431434631348,
"learning_rate": 4.160966712266302e-05,
"loss": 0.5745,
"step": 3675
},
{
"epoch": 2.53077975376197,
"grad_norm": 12.761260986328125,
"learning_rate": 4.151466788265694e-05,
"loss": 0.564,
"step": 3700
},
{
"epoch": 2.547879616963064,
"grad_norm": 10.828967094421387,
"learning_rate": 4.141966864265086e-05,
"loss": 0.5247,
"step": 3725
},
{
"epoch": 2.5649794801641588,
"grad_norm": 8.467166900634766,
"learning_rate": 4.1324669402644785e-05,
"loss": 0.6447,
"step": 3750
},
{
"epoch": 2.582079343365253,
"grad_norm": 5.6609883308410645,
"learning_rate": 4.12296701626387e-05,
"loss": 0.4998,
"step": 3775
},
{
"epoch": 2.5991792065663475,
"grad_norm": 8.889337539672852,
"learning_rate": 4.1134670922632626e-05,
"loss": 0.6064,
"step": 3800
},
{
"epoch": 2.616279069767442,
"grad_norm": 10.798035621643066,
"learning_rate": 4.103967168262654e-05,
"loss": 0.4447,
"step": 3825
},
{
"epoch": 2.633378932968536,
"grad_norm": 5.452834129333496,
"learning_rate": 4.0944672442620466e-05,
"loss": 0.5188,
"step": 3850
},
{
"epoch": 2.650478796169631,
"grad_norm": 21.596166610717773,
"learning_rate": 4.084967320261438e-05,
"loss": 0.486,
"step": 3875
},
{
"epoch": 2.667578659370725,
"grad_norm": 27.14288330078125,
"learning_rate": 4.07546739626083e-05,
"loss": 0.586,
"step": 3900
},
{
"epoch": 2.6846785225718195,
"grad_norm": 6.16865873336792,
"learning_rate": 4.065967472260222e-05,
"loss": 0.5886,
"step": 3925
},
{
"epoch": 2.701778385772914,
"grad_norm": 10.905616760253906,
"learning_rate": 4.056467548259614e-05,
"loss": 0.5854,
"step": 3950
},
{
"epoch": 2.718878248974008,
"grad_norm": 23.615705490112305,
"learning_rate": 4.046967624259006e-05,
"loss": 0.5838,
"step": 3975
},
{
"epoch": 2.7359781121751023,
"grad_norm": 20.192031860351562,
"learning_rate": 4.037467700258398e-05,
"loss": 0.4683,
"step": 4000
},
{
"epoch": 2.753077975376197,
"grad_norm": 4.011788845062256,
"learning_rate": 4.02796777625779e-05,
"loss": 0.4985,
"step": 4025
},
{
"epoch": 2.7701778385772915,
"grad_norm": 4.153777122497559,
"learning_rate": 4.018467852257182e-05,
"loss": 0.654,
"step": 4050
},
{
"epoch": 2.7872777017783856,
"grad_norm": 7.651889324188232,
"learning_rate": 4.008967928256574e-05,
"loss": 0.5508,
"step": 4075
},
{
"epoch": 2.80437756497948,
"grad_norm": 4.187475681304932,
"learning_rate": 3.999468004255966e-05,
"loss": 0.5831,
"step": 4100
},
{
"epoch": 2.8214774281805743,
"grad_norm": 15.8203763961792,
"learning_rate": 3.989968080255358e-05,
"loss": 0.5752,
"step": 4125
},
{
"epoch": 2.838577291381669,
"grad_norm": 3.2058730125427246,
"learning_rate": 3.98046815625475e-05,
"loss": 0.5395,
"step": 4150
},
{
"epoch": 2.8556771545827635,
"grad_norm": 20.272655487060547,
"learning_rate": 3.970968232254142e-05,
"loss": 0.6421,
"step": 4175
},
{
"epoch": 2.8727770177838576,
"grad_norm": 12.041251182556152,
"learning_rate": 3.961468308253534e-05,
"loss": 0.5199,
"step": 4200
},
{
"epoch": 2.889876880984952,
"grad_norm": 8.457215309143066,
"learning_rate": 3.951968384252926e-05,
"loss": 0.3992,
"step": 4225
},
{
"epoch": 2.9069767441860463,
"grad_norm": 7.0436787605285645,
"learning_rate": 3.9424684602523184e-05,
"loss": 0.4758,
"step": 4250
},
{
"epoch": 2.924076607387141,
"grad_norm": 14.91028881072998,
"learning_rate": 3.93296853625171e-05,
"loss": 0.565,
"step": 4275
},
{
"epoch": 2.9411764705882355,
"grad_norm": 6.1303229331970215,
"learning_rate": 3.9234686122511024e-05,
"loss": 0.4658,
"step": 4300
},
{
"epoch": 2.9582763337893296,
"grad_norm": 3.7947280406951904,
"learning_rate": 3.913968688250494e-05,
"loss": 0.4188,
"step": 4325
},
{
"epoch": 2.975376196990424,
"grad_norm": 6.020320415496826,
"learning_rate": 3.9044687642498865e-05,
"loss": 0.5847,
"step": 4350
},
{
"epoch": 2.9924760601915183,
"grad_norm": 3.9531519412994385,
"learning_rate": 3.894968840249278e-05,
"loss": 0.5847,
"step": 4375
},
{
"epoch": 3.0,
"eval_accuracy": 0.9061113007852509,
"eval_f1_macro": 0.7552413664079601,
"eval_f1_micro": 0.9061113007852509,
"eval_f1_weighted": 0.9035670983782715,
"eval_loss": 0.2802155911922455,
"eval_precision_macro": 0.8498221458985794,
"eval_precision_micro": 0.9061113007852509,
"eval_precision_weighted": 0.9095288940815534,
"eval_recall_macro": 0.7266443770545475,
"eval_recall_micro": 0.9061113007852509,
"eval_recall_weighted": 0.9061113007852509,
"eval_runtime": 19.2634,
"eval_samples_per_second": 152.05,
"eval_steps_per_second": 9.552,
"step": 4386
}
],
"logging_steps": 25,
"max_steps": 14620,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.1417490381074596e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}