BTX24's picture
End of training
0e42238 verified
raw
history blame contribute delete
No virus
34.3 kB
{
"best_metric": 0.6991409248766222,
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-hongrui_mammogram_v_1/checkpoint-1710",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05847953216374269,
"grad_norm": 1.5846091508865356,
"learning_rate": 2.9239766081871343e-06,
"loss": 1.3844,
"step": 10
},
{
"epoch": 0.11695906432748537,
"grad_norm": 1.26529061794281,
"learning_rate": 5.8479532163742686e-06,
"loss": 1.3401,
"step": 20
},
{
"epoch": 0.17543859649122806,
"grad_norm": 1.0668294429779053,
"learning_rate": 8.771929824561403e-06,
"loss": 1.2478,
"step": 30
},
{
"epoch": 0.23391812865497075,
"grad_norm": 0.7071924805641174,
"learning_rate": 1.1695906432748537e-05,
"loss": 1.1693,
"step": 40
},
{
"epoch": 0.29239766081871343,
"grad_norm": 0.7040536403656006,
"learning_rate": 1.4619883040935673e-05,
"loss": 1.0874,
"step": 50
},
{
"epoch": 0.3508771929824561,
"grad_norm": 0.5056448578834534,
"learning_rate": 1.7543859649122806e-05,
"loss": 1.0551,
"step": 60
},
{
"epoch": 0.4093567251461988,
"grad_norm": 0.6976526975631714,
"learning_rate": 2.046783625730994e-05,
"loss": 1.0349,
"step": 70
},
{
"epoch": 0.4678362573099415,
"grad_norm": 0.6909885406494141,
"learning_rate": 2.3391812865497074e-05,
"loss": 0.9862,
"step": 80
},
{
"epoch": 0.5263157894736842,
"grad_norm": 1.2905045747756958,
"learning_rate": 2.6315789473684212e-05,
"loss": 0.9897,
"step": 90
},
{
"epoch": 0.5847953216374269,
"grad_norm": 1.0667847394943237,
"learning_rate": 2.9239766081871346e-05,
"loss": 0.9316,
"step": 100
},
{
"epoch": 0.6432748538011696,
"grad_norm": 0.7333235144615173,
"learning_rate": 3.216374269005848e-05,
"loss": 0.9224,
"step": 110
},
{
"epoch": 0.7017543859649122,
"grad_norm": 0.8830112814903259,
"learning_rate": 3.508771929824561e-05,
"loss": 0.9127,
"step": 120
},
{
"epoch": 0.7602339181286549,
"grad_norm": 0.7408013343811035,
"learning_rate": 3.8011695906432746e-05,
"loss": 0.9349,
"step": 130
},
{
"epoch": 0.8187134502923976,
"grad_norm": 0.6464580297470093,
"learning_rate": 4.093567251461988e-05,
"loss": 0.9046,
"step": 140
},
{
"epoch": 0.8771929824561403,
"grad_norm": 0.9568632245063782,
"learning_rate": 4.3859649122807014e-05,
"loss": 0.8981,
"step": 150
},
{
"epoch": 0.935672514619883,
"grad_norm": 0.755339503288269,
"learning_rate": 4.678362573099415e-05,
"loss": 0.9012,
"step": 160
},
{
"epoch": 0.9941520467836257,
"grad_norm": 0.8921021819114685,
"learning_rate": 4.970760233918128e-05,
"loss": 0.8576,
"step": 170
},
{
"epoch": 1.0,
"eval_accuracy": 0.6677938219703893,
"eval_f1": 0.6066566978834349,
"eval_loss": 0.8430724143981934,
"eval_precision": 0.7751300124185229,
"eval_recall": 0.6677938219703893,
"eval_runtime": 78.2201,
"eval_samples_per_second": 139.887,
"eval_steps_per_second": 2.186,
"step": 171
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.7332887053489685,
"learning_rate": 4.970760233918128e-05,
"loss": 0.8527,
"step": 180
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.776443362236023,
"learning_rate": 4.938271604938271e-05,
"loss": 0.8561,
"step": 190
},
{
"epoch": 1.1695906432748537,
"grad_norm": 0.7149679660797119,
"learning_rate": 4.9057829759584143e-05,
"loss": 0.8435,
"step": 200
},
{
"epoch": 1.2280701754385965,
"grad_norm": 0.8708255290985107,
"learning_rate": 4.8732943469785574e-05,
"loss": 0.8332,
"step": 210
},
{
"epoch": 1.286549707602339,
"grad_norm": 0.8141400814056396,
"learning_rate": 4.8408057179987004e-05,
"loss": 0.8377,
"step": 220
},
{
"epoch": 1.345029239766082,
"grad_norm": 1.188214659690857,
"learning_rate": 4.8083170890188434e-05,
"loss": 0.8456,
"step": 230
},
{
"epoch": 1.4035087719298245,
"grad_norm": 0.6589232087135315,
"learning_rate": 4.7758284600389865e-05,
"loss": 0.8387,
"step": 240
},
{
"epoch": 1.4619883040935673,
"grad_norm": 0.9304301142692566,
"learning_rate": 4.7433398310591295e-05,
"loss": 0.8521,
"step": 250
},
{
"epoch": 1.52046783625731,
"grad_norm": 0.8288267254829407,
"learning_rate": 4.7108512020792725e-05,
"loss": 0.8453,
"step": 260
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.8100181221961975,
"learning_rate": 4.678362573099415e-05,
"loss": 0.8398,
"step": 270
},
{
"epoch": 1.6374269005847952,
"grad_norm": 1.2519994974136353,
"learning_rate": 4.645873944119558e-05,
"loss": 0.835,
"step": 280
},
{
"epoch": 1.695906432748538,
"grad_norm": 0.868988573551178,
"learning_rate": 4.613385315139701e-05,
"loss": 0.8,
"step": 290
},
{
"epoch": 1.7543859649122808,
"grad_norm": 0.7534909844398499,
"learning_rate": 4.580896686159844e-05,
"loss": 0.8103,
"step": 300
},
{
"epoch": 1.8128654970760234,
"grad_norm": 0.9954193234443665,
"learning_rate": 4.548408057179987e-05,
"loss": 0.8065,
"step": 310
},
{
"epoch": 1.871345029239766,
"grad_norm": 0.6459550857543945,
"learning_rate": 4.51591942820013e-05,
"loss": 0.8196,
"step": 320
},
{
"epoch": 1.9298245614035088,
"grad_norm": 0.9698415398597717,
"learning_rate": 4.483430799220273e-05,
"loss": 0.8464,
"step": 330
},
{
"epoch": 1.9883040935672516,
"grad_norm": 0.9862537980079651,
"learning_rate": 4.450942170240416e-05,
"loss": 0.8297,
"step": 340
},
{
"epoch": 2.0,
"eval_accuracy": 0.6791263023213306,
"eval_f1": 0.6182249859592962,
"eval_loss": 0.796485424041748,
"eval_precision": 0.6757921489303887,
"eval_recall": 0.6791263023213306,
"eval_runtime": 78.3952,
"eval_samples_per_second": 139.575,
"eval_steps_per_second": 2.181,
"step": 342
},
{
"epoch": 2.046783625730994,
"grad_norm": 0.7553840279579163,
"learning_rate": 4.418453541260559e-05,
"loss": 0.8262,
"step": 350
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.8275452852249146,
"learning_rate": 4.3859649122807014e-05,
"loss": 0.809,
"step": 360
},
{
"epoch": 2.1637426900584797,
"grad_norm": 0.6450644731521606,
"learning_rate": 4.3534762833008445e-05,
"loss": 0.7939,
"step": 370
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.6809207201004028,
"learning_rate": 4.3209876543209875e-05,
"loss": 0.8157,
"step": 380
},
{
"epoch": 2.280701754385965,
"grad_norm": 0.9741197228431702,
"learning_rate": 4.2884990253411305e-05,
"loss": 0.8126,
"step": 390
},
{
"epoch": 2.3391812865497075,
"grad_norm": 0.6929029822349548,
"learning_rate": 4.2560103963612735e-05,
"loss": 0.8155,
"step": 400
},
{
"epoch": 2.39766081871345,
"grad_norm": 0.836373507976532,
"learning_rate": 4.2235217673814166e-05,
"loss": 0.8145,
"step": 410
},
{
"epoch": 2.456140350877193,
"grad_norm": 0.7719972729682922,
"learning_rate": 4.1910331384015596e-05,
"loss": 0.7963,
"step": 420
},
{
"epoch": 2.5146198830409356,
"grad_norm": 0.5631088018417358,
"learning_rate": 4.1585445094217026e-05,
"loss": 0.7839,
"step": 430
},
{
"epoch": 2.573099415204678,
"grad_norm": 0.7167489528656006,
"learning_rate": 4.1260558804418457e-05,
"loss": 0.7837,
"step": 440
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.8713414072990417,
"learning_rate": 4.093567251461988e-05,
"loss": 0.8046,
"step": 450
},
{
"epoch": 2.690058479532164,
"grad_norm": 1.0731910467147827,
"learning_rate": 4.061078622482131e-05,
"loss": 0.7813,
"step": 460
},
{
"epoch": 2.7485380116959064,
"grad_norm": 0.6702953577041626,
"learning_rate": 4.028589993502274e-05,
"loss": 0.8076,
"step": 470
},
{
"epoch": 2.807017543859649,
"grad_norm": 0.6061651706695557,
"learning_rate": 3.996101364522417e-05,
"loss": 0.786,
"step": 480
},
{
"epoch": 2.8654970760233915,
"grad_norm": 0.9429291486740112,
"learning_rate": 3.96361273554256e-05,
"loss": 0.817,
"step": 490
},
{
"epoch": 2.9239766081871346,
"grad_norm": 0.7229118347167969,
"learning_rate": 3.931124106562703e-05,
"loss": 0.8003,
"step": 500
},
{
"epoch": 2.982456140350877,
"grad_norm": 0.702900230884552,
"learning_rate": 3.898635477582846e-05,
"loss": 0.8303,
"step": 510
},
{
"epoch": 3.0,
"eval_accuracy": 0.6842441966733687,
"eval_f1": 0.6360238176585905,
"eval_loss": 0.7872248888015747,
"eval_precision": 0.6704334160874683,
"eval_recall": 0.6842441966733687,
"eval_runtime": 77.468,
"eval_samples_per_second": 141.245,
"eval_steps_per_second": 2.207,
"step": 513
},
{
"epoch": 3.0409356725146197,
"grad_norm": 0.6098015308380127,
"learning_rate": 3.866146848602989e-05,
"loss": 0.7717,
"step": 520
},
{
"epoch": 3.0994152046783627,
"grad_norm": 0.7494928240776062,
"learning_rate": 3.8336582196231315e-05,
"loss": 0.7807,
"step": 530
},
{
"epoch": 3.1578947368421053,
"grad_norm": 0.8039376735687256,
"learning_rate": 3.8011695906432746e-05,
"loss": 0.79,
"step": 540
},
{
"epoch": 3.216374269005848,
"grad_norm": 0.687044084072113,
"learning_rate": 3.7686809616634176e-05,
"loss": 0.8004,
"step": 550
},
{
"epoch": 3.2748538011695905,
"grad_norm": 0.9368821978569031,
"learning_rate": 3.7361923326835606e-05,
"loss": 0.8002,
"step": 560
},
{
"epoch": 3.3333333333333335,
"grad_norm": 1.0392132997512817,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.8042,
"step": 570
},
{
"epoch": 3.391812865497076,
"grad_norm": 0.5632928013801575,
"learning_rate": 3.671215074723847e-05,
"loss": 0.7746,
"step": 580
},
{
"epoch": 3.4502923976608186,
"grad_norm": 0.6281052231788635,
"learning_rate": 3.63872644574399e-05,
"loss": 0.7691,
"step": 590
},
{
"epoch": 3.5087719298245617,
"grad_norm": 0.6072255969047546,
"learning_rate": 3.606237816764133e-05,
"loss": 0.7552,
"step": 600
},
{
"epoch": 3.5672514619883042,
"grad_norm": 0.6243124604225159,
"learning_rate": 3.573749187784276e-05,
"loss": 0.7905,
"step": 610
},
{
"epoch": 3.625730994152047,
"grad_norm": 0.8322011828422546,
"learning_rate": 3.541260558804418e-05,
"loss": 0.7772,
"step": 620
},
{
"epoch": 3.6842105263157894,
"grad_norm": 0.9370966553688049,
"learning_rate": 3.508771929824561e-05,
"loss": 0.7683,
"step": 630
},
{
"epoch": 3.742690058479532,
"grad_norm": 0.6632276177406311,
"learning_rate": 3.476283300844704e-05,
"loss": 0.7853,
"step": 640
},
{
"epoch": 3.801169590643275,
"grad_norm": 0.678115963935852,
"learning_rate": 3.443794671864847e-05,
"loss": 0.7691,
"step": 650
},
{
"epoch": 3.8596491228070176,
"grad_norm": 0.6135697960853577,
"learning_rate": 3.41130604288499e-05,
"loss": 0.7778,
"step": 660
},
{
"epoch": 3.91812865497076,
"grad_norm": 0.6042221188545227,
"learning_rate": 3.378817413905133e-05,
"loss": 0.7983,
"step": 670
},
{
"epoch": 3.976608187134503,
"grad_norm": 0.8058825135231018,
"learning_rate": 3.346328784925276e-05,
"loss": 0.7814,
"step": 680
},
{
"epoch": 4.0,
"eval_accuracy": 0.6843355876439408,
"eval_f1": 0.659727957333552,
"eval_loss": 0.7717081308364868,
"eval_precision": 0.6601378633948723,
"eval_recall": 0.6843355876439408,
"eval_runtime": 78.1449,
"eval_samples_per_second": 140.022,
"eval_steps_per_second": 2.188,
"step": 684
},
{
"epoch": 4.035087719298246,
"grad_norm": 0.966820478439331,
"learning_rate": 3.313840155945419e-05,
"loss": 0.7736,
"step": 690
},
{
"epoch": 4.093567251461988,
"grad_norm": 1.1176632642745972,
"learning_rate": 3.281351526965562e-05,
"loss": 0.7529,
"step": 700
},
{
"epoch": 4.152046783625731,
"grad_norm": 0.6780201196670532,
"learning_rate": 3.248862897985705e-05,
"loss": 0.7722,
"step": 710
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.8257865905761719,
"learning_rate": 3.216374269005848e-05,
"loss": 0.771,
"step": 720
},
{
"epoch": 4.269005847953216,
"grad_norm": 0.6105393767356873,
"learning_rate": 3.183885640025991e-05,
"loss": 0.7883,
"step": 730
},
{
"epoch": 4.3274853801169595,
"grad_norm": 0.8537980914115906,
"learning_rate": 3.151397011046134e-05,
"loss": 0.7698,
"step": 740
},
{
"epoch": 4.385964912280702,
"grad_norm": 0.8124959468841553,
"learning_rate": 3.118908382066277e-05,
"loss": 0.7737,
"step": 750
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.8992810845375061,
"learning_rate": 3.08641975308642e-05,
"loss": 0.7809,
"step": 760
},
{
"epoch": 4.502923976608187,
"grad_norm": 0.6706241965293884,
"learning_rate": 3.053931124106563e-05,
"loss": 0.7629,
"step": 770
},
{
"epoch": 4.56140350877193,
"grad_norm": 0.8107186555862427,
"learning_rate": 3.0214424951267055e-05,
"loss": 0.7683,
"step": 780
},
{
"epoch": 4.619883040935672,
"grad_norm": 0.7054151296615601,
"learning_rate": 2.9889538661468486e-05,
"loss": 0.7367,
"step": 790
},
{
"epoch": 4.678362573099415,
"grad_norm": 0.7284806966781616,
"learning_rate": 2.9564652371669916e-05,
"loss": 0.7476,
"step": 800
},
{
"epoch": 4.7368421052631575,
"grad_norm": 1.1839812994003296,
"learning_rate": 2.9239766081871346e-05,
"loss": 0.7565,
"step": 810
},
{
"epoch": 4.7953216374269,
"grad_norm": 0.7781530618667603,
"learning_rate": 2.8914879792072773e-05,
"loss": 0.7737,
"step": 820
},
{
"epoch": 4.853801169590644,
"grad_norm": 0.7338679432868958,
"learning_rate": 2.8589993502274203e-05,
"loss": 0.7519,
"step": 830
},
{
"epoch": 4.912280701754386,
"grad_norm": 1.015286922454834,
"learning_rate": 2.8265107212475634e-05,
"loss": 0.8023,
"step": 840
},
{
"epoch": 4.970760233918129,
"grad_norm": 0.8456217050552368,
"learning_rate": 2.7940220922677064e-05,
"loss": 0.7768,
"step": 850
},
{
"epoch": 5.0,
"eval_accuracy": 0.6905501736428441,
"eval_f1": 0.6543647035652124,
"eval_loss": 0.7693981528282166,
"eval_precision": 0.6774590515406133,
"eval_recall": 0.6905501736428441,
"eval_runtime": 77.1215,
"eval_samples_per_second": 141.88,
"eval_steps_per_second": 2.217,
"step": 855
},
{
"epoch": 5.029239766081871,
"grad_norm": 0.624717116355896,
"learning_rate": 2.761533463287849e-05,
"loss": 0.7482,
"step": 860
},
{
"epoch": 5.087719298245614,
"grad_norm": 0.752734363079071,
"learning_rate": 2.729044834307992e-05,
"loss": 0.7259,
"step": 870
},
{
"epoch": 5.146198830409356,
"grad_norm": 0.6503344178199768,
"learning_rate": 2.696556205328135e-05,
"loss": 0.7488,
"step": 880
},
{
"epoch": 5.204678362573099,
"grad_norm": 0.8620956540107727,
"learning_rate": 2.664067576348278e-05,
"loss": 0.7704,
"step": 890
},
{
"epoch": 5.2631578947368425,
"grad_norm": 0.6938666701316833,
"learning_rate": 2.6315789473684212e-05,
"loss": 0.76,
"step": 900
},
{
"epoch": 5.321637426900585,
"grad_norm": 0.8206263184547424,
"learning_rate": 2.599090318388564e-05,
"loss": 0.7685,
"step": 910
},
{
"epoch": 5.380116959064328,
"grad_norm": 0.8919401168823242,
"learning_rate": 2.566601689408707e-05,
"loss": 0.7673,
"step": 920
},
{
"epoch": 5.43859649122807,
"grad_norm": 0.9412862062454224,
"learning_rate": 2.53411306042885e-05,
"loss": 0.7403,
"step": 930
},
{
"epoch": 5.497076023391813,
"grad_norm": 1.1093353033065796,
"learning_rate": 2.501624431448993e-05,
"loss": 0.743,
"step": 940
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.6838064193725586,
"learning_rate": 2.4691358024691357e-05,
"loss": 0.7641,
"step": 950
},
{
"epoch": 5.614035087719298,
"grad_norm": 0.7546567320823669,
"learning_rate": 2.4366471734892787e-05,
"loss": 0.7648,
"step": 960
},
{
"epoch": 5.6725146198830405,
"grad_norm": 0.7909204363822937,
"learning_rate": 2.4041585445094217e-05,
"loss": 0.7872,
"step": 970
},
{
"epoch": 5.730994152046784,
"grad_norm": 0.6969336867332458,
"learning_rate": 2.3716699155295647e-05,
"loss": 0.7638,
"step": 980
},
{
"epoch": 5.7894736842105265,
"grad_norm": 0.7838913202285767,
"learning_rate": 2.3391812865497074e-05,
"loss": 0.747,
"step": 990
},
{
"epoch": 5.847953216374269,
"grad_norm": 0.8347417712211609,
"learning_rate": 2.3066926575698505e-05,
"loss": 0.7749,
"step": 1000
},
{
"epoch": 5.906432748538012,
"grad_norm": 0.9646545052528381,
"learning_rate": 2.2742040285899935e-05,
"loss": 0.737,
"step": 1010
},
{
"epoch": 5.964912280701754,
"grad_norm": 0.6134990453720093,
"learning_rate": 2.2417153996101365e-05,
"loss": 0.7415,
"step": 1020
},
{
"epoch": 6.0,
"eval_accuracy": 0.6962164138183148,
"eval_f1": 0.671796652878038,
"eval_loss": 0.7572136521339417,
"eval_precision": 0.6763761787994358,
"eval_recall": 0.6962164138183148,
"eval_runtime": 77.9563,
"eval_samples_per_second": 140.361,
"eval_steps_per_second": 2.194,
"step": 1026
},
{
"epoch": 6.023391812865497,
"grad_norm": 0.7049497961997986,
"learning_rate": 2.2092267706302795e-05,
"loss": 0.7598,
"step": 1030
},
{
"epoch": 6.081871345029239,
"grad_norm": 0.9780289530754089,
"learning_rate": 2.1767381416504222e-05,
"loss": 0.7472,
"step": 1040
},
{
"epoch": 6.140350877192983,
"grad_norm": 0.7058891654014587,
"learning_rate": 2.1442495126705653e-05,
"loss": 0.742,
"step": 1050
},
{
"epoch": 6.1988304093567255,
"grad_norm": 0.8734349012374878,
"learning_rate": 2.1117608836907083e-05,
"loss": 0.7581,
"step": 1060
},
{
"epoch": 6.257309941520468,
"grad_norm": 0.8839743733406067,
"learning_rate": 2.0792722547108513e-05,
"loss": 0.7516,
"step": 1070
},
{
"epoch": 6.315789473684211,
"grad_norm": 0.6963735222816467,
"learning_rate": 2.046783625730994e-05,
"loss": 0.7412,
"step": 1080
},
{
"epoch": 6.374269005847953,
"grad_norm": 0.9337784647941589,
"learning_rate": 2.014294996751137e-05,
"loss": 0.7402,
"step": 1090
},
{
"epoch": 6.432748538011696,
"grad_norm": 0.6648013591766357,
"learning_rate": 1.98180636777128e-05,
"loss": 0.7513,
"step": 1100
},
{
"epoch": 6.491228070175438,
"grad_norm": 1.072342872619629,
"learning_rate": 1.949317738791423e-05,
"loss": 0.7406,
"step": 1110
},
{
"epoch": 6.549707602339181,
"grad_norm": 1.0100135803222656,
"learning_rate": 1.9168291098115658e-05,
"loss": 0.7643,
"step": 1120
},
{
"epoch": 6.6081871345029235,
"grad_norm": 0.7687884569168091,
"learning_rate": 1.8843404808317088e-05,
"loss": 0.7404,
"step": 1130
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.750688374042511,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.7391,
"step": 1140
},
{
"epoch": 6.7251461988304095,
"grad_norm": 0.7010438442230225,
"learning_rate": 1.819363222871995e-05,
"loss": 0.7534,
"step": 1150
},
{
"epoch": 6.783625730994152,
"grad_norm": 1.0012060403823853,
"learning_rate": 1.786874593892138e-05,
"loss": 0.7485,
"step": 1160
},
{
"epoch": 6.842105263157895,
"grad_norm": 0.8860548734664917,
"learning_rate": 1.7543859649122806e-05,
"loss": 0.7606,
"step": 1170
},
{
"epoch": 6.900584795321637,
"grad_norm": 0.969633936882019,
"learning_rate": 1.7218973359324236e-05,
"loss": 0.7592,
"step": 1180
},
{
"epoch": 6.95906432748538,
"grad_norm": 0.8473331928253174,
"learning_rate": 1.6894087069525666e-05,
"loss": 0.7351,
"step": 1190
},
{
"epoch": 7.0,
"eval_accuracy": 0.692195211113142,
"eval_f1": 0.6568623393542826,
"eval_loss": 0.754936695098877,
"eval_precision": 0.6648205901494669,
"eval_recall": 0.692195211113142,
"eval_runtime": 77.9138,
"eval_samples_per_second": 140.437,
"eval_steps_per_second": 2.195,
"step": 1197
},
{
"epoch": 7.017543859649122,
"grad_norm": 0.7504809498786926,
"learning_rate": 1.6569200779727097e-05,
"loss": 0.7102,
"step": 1200
},
{
"epoch": 7.076023391812866,
"grad_norm": 1.306260585784912,
"learning_rate": 1.6244314489928523e-05,
"loss": 0.7574,
"step": 1210
},
{
"epoch": 7.1345029239766085,
"grad_norm": 0.7214799523353577,
"learning_rate": 1.5919428200129954e-05,
"loss": 0.7355,
"step": 1220
},
{
"epoch": 7.192982456140351,
"grad_norm": 0.8254335522651672,
"learning_rate": 1.5594541910331384e-05,
"loss": 0.758,
"step": 1230
},
{
"epoch": 7.251461988304094,
"grad_norm": 0.8644353747367859,
"learning_rate": 1.5269655620532814e-05,
"loss": 0.7131,
"step": 1240
},
{
"epoch": 7.309941520467836,
"grad_norm": 0.7876085638999939,
"learning_rate": 1.4944769330734243e-05,
"loss": 0.7437,
"step": 1250
},
{
"epoch": 7.368421052631579,
"grad_norm": 0.904586136341095,
"learning_rate": 1.4619883040935673e-05,
"loss": 0.7464,
"step": 1260
},
{
"epoch": 7.426900584795321,
"grad_norm": 1.033260464668274,
"learning_rate": 1.4294996751137102e-05,
"loss": 0.7226,
"step": 1270
},
{
"epoch": 7.485380116959064,
"grad_norm": 0.9057112336158752,
"learning_rate": 1.3970110461338532e-05,
"loss": 0.7425,
"step": 1280
},
{
"epoch": 7.543859649122807,
"grad_norm": 0.8631776571273804,
"learning_rate": 1.364522417153996e-05,
"loss": 0.7178,
"step": 1290
},
{
"epoch": 7.60233918128655,
"grad_norm": 0.8566320538520813,
"learning_rate": 1.332033788174139e-05,
"loss": 0.7151,
"step": 1300
},
{
"epoch": 7.6608187134502925,
"grad_norm": 1.056127905845642,
"learning_rate": 1.299545159194282e-05,
"loss": 0.7574,
"step": 1310
},
{
"epoch": 7.719298245614035,
"grad_norm": 1.0582066774368286,
"learning_rate": 1.267056530214425e-05,
"loss": 0.7462,
"step": 1320
},
{
"epoch": 7.777777777777778,
"grad_norm": 1.0808275938034058,
"learning_rate": 1.2345679012345678e-05,
"loss": 0.7181,
"step": 1330
},
{
"epoch": 7.83625730994152,
"grad_norm": 0.8452061414718628,
"learning_rate": 1.2020792722547109e-05,
"loss": 0.7686,
"step": 1340
},
{
"epoch": 7.894736842105263,
"grad_norm": 0.7253689765930176,
"learning_rate": 1.1695906432748537e-05,
"loss": 0.7174,
"step": 1350
},
{
"epoch": 7.953216374269006,
"grad_norm": 0.9176128506660461,
"learning_rate": 1.1371020142949967e-05,
"loss": 0.7197,
"step": 1360
},
{
"epoch": 8.0,
"eval_accuracy": 0.6985925790531895,
"eval_f1": 0.6855055863067254,
"eval_loss": 0.7478834390640259,
"eval_precision": 0.6925926647987316,
"eval_recall": 0.6985925790531895,
"eval_runtime": 77.8555,
"eval_samples_per_second": 140.542,
"eval_steps_per_second": 2.196,
"step": 1368
},
{
"epoch": 8.011695906432749,
"grad_norm": 0.7897553443908691,
"learning_rate": 1.1046133853151398e-05,
"loss": 0.755,
"step": 1370
},
{
"epoch": 8.070175438596491,
"grad_norm": 0.7324469685554504,
"learning_rate": 1.0721247563352826e-05,
"loss": 0.7243,
"step": 1380
},
{
"epoch": 8.128654970760234,
"grad_norm": 0.7983306646347046,
"learning_rate": 1.0396361273554257e-05,
"loss": 0.7294,
"step": 1390
},
{
"epoch": 8.187134502923977,
"grad_norm": 0.9110460877418518,
"learning_rate": 1.0071474983755685e-05,
"loss": 0.7027,
"step": 1400
},
{
"epoch": 8.24561403508772,
"grad_norm": 0.9574342966079712,
"learning_rate": 9.746588693957115e-06,
"loss": 0.7131,
"step": 1410
},
{
"epoch": 8.304093567251462,
"grad_norm": 0.7169631719589233,
"learning_rate": 9.421702404158544e-06,
"loss": 0.7365,
"step": 1420
},
{
"epoch": 8.362573099415204,
"grad_norm": 0.9551491737365723,
"learning_rate": 9.096816114359974e-06,
"loss": 0.7313,
"step": 1430
},
{
"epoch": 8.421052631578947,
"grad_norm": 1.159575343132019,
"learning_rate": 8.771929824561403e-06,
"loss": 0.7438,
"step": 1440
},
{
"epoch": 8.47953216374269,
"grad_norm": 0.8166360259056091,
"learning_rate": 8.447043534762833e-06,
"loss": 0.7355,
"step": 1450
},
{
"epoch": 8.538011695906432,
"grad_norm": 0.8369165062904358,
"learning_rate": 8.122157244964262e-06,
"loss": 0.7183,
"step": 1460
},
{
"epoch": 8.596491228070175,
"grad_norm": 0.7923627495765686,
"learning_rate": 7.797270955165692e-06,
"loss": 0.711,
"step": 1470
},
{
"epoch": 8.654970760233919,
"grad_norm": 0.7623910903930664,
"learning_rate": 7.4723846653671214e-06,
"loss": 0.7377,
"step": 1480
},
{
"epoch": 8.713450292397662,
"grad_norm": 1.088745355606079,
"learning_rate": 7.147498375568551e-06,
"loss": 0.7199,
"step": 1490
},
{
"epoch": 8.771929824561404,
"grad_norm": 0.8672430515289307,
"learning_rate": 6.82261208576998e-06,
"loss": 0.7234,
"step": 1500
},
{
"epoch": 8.830409356725147,
"grad_norm": 0.77957683801651,
"learning_rate": 6.49772579597141e-06,
"loss": 0.7348,
"step": 1510
},
{
"epoch": 8.88888888888889,
"grad_norm": 0.982523500919342,
"learning_rate": 6.172839506172839e-06,
"loss": 0.7348,
"step": 1520
},
{
"epoch": 8.947368421052632,
"grad_norm": 0.8758224844932556,
"learning_rate": 5.8479532163742686e-06,
"loss": 0.7087,
"step": 1530
},
{
"epoch": 9.0,
"eval_accuracy": 0.6978614512886127,
"eval_f1": 0.6697434161663234,
"eval_loss": 0.744518518447876,
"eval_precision": 0.6792260519903555,
"eval_recall": 0.6978614512886127,
"eval_runtime": 77.9794,
"eval_samples_per_second": 140.319,
"eval_steps_per_second": 2.193,
"step": 1539
},
{
"epoch": 9.005847953216374,
"grad_norm": 0.7864174246788025,
"learning_rate": 5.523066926575699e-06,
"loss": 0.713,
"step": 1540
},
{
"epoch": 9.064327485380117,
"grad_norm": 1.0881294012069702,
"learning_rate": 5.198180636777128e-06,
"loss": 0.7092,
"step": 1550
},
{
"epoch": 9.12280701754386,
"grad_norm": 1.0221022367477417,
"learning_rate": 4.873294346978558e-06,
"loss": 0.7463,
"step": 1560
},
{
"epoch": 9.181286549707602,
"grad_norm": 0.8976357579231262,
"learning_rate": 4.548408057179987e-06,
"loss": 0.7392,
"step": 1570
},
{
"epoch": 9.239766081871345,
"grad_norm": 0.8547308444976807,
"learning_rate": 4.2235217673814166e-06,
"loss": 0.7275,
"step": 1580
},
{
"epoch": 9.298245614035087,
"grad_norm": 0.9341883063316345,
"learning_rate": 3.898635477582846e-06,
"loss": 0.7183,
"step": 1590
},
{
"epoch": 9.35672514619883,
"grad_norm": 0.9447769522666931,
"learning_rate": 3.5737491877842754e-06,
"loss": 0.6966,
"step": 1600
},
{
"epoch": 9.415204678362572,
"grad_norm": 1.0918306112289429,
"learning_rate": 3.248862897985705e-06,
"loss": 0.7161,
"step": 1610
},
{
"epoch": 9.473684210526315,
"grad_norm": 0.9330850839614868,
"learning_rate": 2.9239766081871343e-06,
"loss": 0.7093,
"step": 1620
},
{
"epoch": 9.53216374269006,
"grad_norm": 0.8420782685279846,
"learning_rate": 2.599090318388564e-06,
"loss": 0.7259,
"step": 1630
},
{
"epoch": 9.590643274853802,
"grad_norm": 0.9159696698188782,
"learning_rate": 2.2742040285899936e-06,
"loss": 0.7265,
"step": 1640
},
{
"epoch": 9.649122807017545,
"grad_norm": 1.0164194107055664,
"learning_rate": 1.949317738791423e-06,
"loss": 0.7458,
"step": 1650
},
{
"epoch": 9.707602339181287,
"grad_norm": 0.8425694704055786,
"learning_rate": 1.6244314489928524e-06,
"loss": 0.7149,
"step": 1660
},
{
"epoch": 9.76608187134503,
"grad_norm": 1.1017402410507202,
"learning_rate": 1.299545159194282e-06,
"loss": 0.7101,
"step": 1670
},
{
"epoch": 9.824561403508772,
"grad_norm": 0.8220164179801941,
"learning_rate": 9.746588693957115e-07,
"loss": 0.7174,
"step": 1680
},
{
"epoch": 9.883040935672515,
"grad_norm": 0.7968518733978271,
"learning_rate": 6.49772579597141e-07,
"loss": 0.7236,
"step": 1690
},
{
"epoch": 9.941520467836257,
"grad_norm": 0.7491603493690491,
"learning_rate": 3.248862897985705e-07,
"loss": 0.6978,
"step": 1700
},
{
"epoch": 10.0,
"grad_norm": 0.8862149119377136,
"learning_rate": 0.0,
"loss": 0.6977,
"step": 1710
},
{
"epoch": 10.0,
"eval_accuracy": 0.6991409248766222,
"eval_f1": 0.6766571542539821,
"eval_loss": 0.7419390082359314,
"eval_precision": 0.6830062110035815,
"eval_recall": 0.6991409248766222,
"eval_runtime": 78.4178,
"eval_samples_per_second": 139.535,
"eval_steps_per_second": 2.181,
"step": 1710
},
{
"epoch": 10.0,
"step": 1710,
"total_flos": 3.3914202248568177e+19,
"train_loss": 0.790359598014787,
"train_runtime": 7828.8109,
"train_samples_per_second": 55.901,
"train_steps_per_second": 0.218
}
],
"logging_steps": 10,
"max_steps": 1710,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.3914202248568177e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}