End of training

Browse files

Files changed (5) hide show

all_results.json +13 -0
eval_results.json +8 -0
runs/Oct26_04-05-49_4c56d22e99c3/events.out.tfevents.1729923978.4c56d22e99c3.214.1 +3 -0
train_results.json +8 -0
trainer_state.json +1182 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 2.995776630689817,
+    "eval_accuracy": 0.834983498349835,
+    "eval_loss": 0.6400949358940125,
+    "eval_runtime": 121.5182,
+    "eval_samples_per_second": 62.336,
+    "eval_steps_per_second": 1.95,
+    "total_flos": 1.584209316447959e+19,
+    "train_loss": 1.1699181137825911,
+    "train_runtime": 8274.5103,
+    "train_samples_per_second": 24.717,
+    "train_steps_per_second": 0.193
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.995776630689817,
+    "eval_accuracy": 0.834983498349835,
+    "eval_loss": 0.6400949358940125,
+    "eval_runtime": 121.5182,
+    "eval_samples_per_second": 62.336,
+    "eval_steps_per_second": 1.95
+}

runs/Oct26_04-05-49_4c56d22e99c3/events.out.tfevents.1729923978.4c56d22e99c3.214.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f36dbd3d84a6965d70511f0887ea0848067d8d3a6d0b9530eac04b0cc7211425
+size 411

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.995776630689817,
+    "total_flos": 1.584209316447959e+19,
+    "train_loss": 1.1699181137825911,
+    "train_runtime": 8274.5103,
+    "train_samples_per_second": 24.717,
+    "train_steps_per_second": 0.193
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1182 @@

+{
+  "best_metric": 0.834983498349835,
+  "best_model_checkpoint": "vit-base-patch16-224-finetuned-food101/checkpoint-1596",
+  "epoch": 2.995776630689817,
+  "eval_steps": 500,
+  "global_step": 1596,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.018770530267480056,
+      "grad_norm": 2.9933698177337646,
+      "learning_rate": 3.125e-06,
+      "loss": 4.739,
+      "step": 10
+    },
+    {
+      "epoch": 0.03754106053496011,
+      "grad_norm": 3.0962929725646973,
+      "learning_rate": 6.25e-06,
+      "loss": 4.7159,
+      "step": 20
+    },
+    {
+      "epoch": 0.05631159080244017,
+      "grad_norm": 3.2671728134155273,
+      "learning_rate": 9.375000000000001e-06,
+      "loss": 4.6674,
+      "step": 30
+    },
+    {
+      "epoch": 0.07508212106992022,
+      "grad_norm": 3.073523759841919,
+      "learning_rate": 1.25e-05,
+      "loss": 4.6293,
+      "step": 40
+    },
+    {
+      "epoch": 0.09385265133740028,
+      "grad_norm": 3.1302390098571777,
+      "learning_rate": 1.5625e-05,
+      "loss": 4.5709,
+      "step": 50
+    },
+    {
+      "epoch": 0.11262318160488034,
+      "grad_norm": 2.9043614864349365,
+      "learning_rate": 1.8750000000000002e-05,
+      "loss": 4.4634,
+      "step": 60
+    },
+    {
+      "epoch": 0.1313937118723604,
+      "grad_norm": 3.4175333976745605,
+      "learning_rate": 2.1875e-05,
+      "loss": 4.3879,
+      "step": 70
+    },
+    {
+      "epoch": 0.15016424213984045,
+      "grad_norm": 3.323888063430786,
+      "learning_rate": 2.5e-05,
+      "loss": 4.2587,
+      "step": 80
+    },
+    {
+      "epoch": 0.1689347724073205,
+      "grad_norm": 3.0213475227355957,
+      "learning_rate": 2.8125000000000003e-05,
+      "loss": 4.0885,
+      "step": 90
+    },
+    {
+      "epoch": 0.18770530267480057,
+      "grad_norm": 3.02178955078125,
+      "learning_rate": 3.125e-05,
+      "loss": 3.9028,
+      "step": 100
+    },
+    {
+      "epoch": 0.2064758329422806,
+      "grad_norm": 3.3495538234710693,
+      "learning_rate": 3.4375e-05,
+      "loss": 3.7353,
+      "step": 110
+    },
+    {
+      "epoch": 0.22524636320976069,
+      "grad_norm": 3.1515848636627197,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 3.4697,
+      "step": 120
+    },
+    {
+      "epoch": 0.24401689347724073,
+      "grad_norm": 3.284679889678955,
+      "learning_rate": 4.0625000000000005e-05,
+      "loss": 3.2815,
+      "step": 130
+    },
+    {
+      "epoch": 0.2627874237447208,
+      "grad_norm": 3.226576805114746,
+      "learning_rate": 4.375e-05,
+      "loss": 2.9895,
+      "step": 140
+    },
+    {
+      "epoch": 0.28155795401220085,
+      "grad_norm": 3.181521415710449,
+      "learning_rate": 4.6875e-05,
+      "loss": 2.7421,
+      "step": 150
+    },
+    {
+      "epoch": 0.3003284842796809,
+      "grad_norm": 3.1563026905059814,
+      "learning_rate": 5e-05,
+      "loss": 2.587,
+      "step": 160
+    },
+    {
+      "epoch": 0.31909901454716094,
+      "grad_norm": 3.2322466373443604,
+      "learning_rate": 4.965181058495822e-05,
+      "loss": 2.3671,
+      "step": 170
+    },
+    {
+      "epoch": 0.337869544814641,
+      "grad_norm": 3.2258055210113525,
+      "learning_rate": 4.930362116991643e-05,
+      "loss": 2.1425,
+      "step": 180
+    },
+    {
+      "epoch": 0.3566400750821211,
+      "grad_norm": 3.141265392303467,
+      "learning_rate": 4.895543175487465e-05,
+      "loss": 2.0185,
+      "step": 190
+    },
+    {
+      "epoch": 0.37541060534960113,
+      "grad_norm": 3.157705307006836,
+      "learning_rate": 4.860724233983287e-05,
+      "loss": 1.9035,
+      "step": 200
+    },
+    {
+      "epoch": 0.3941811356170812,
+      "grad_norm": 3.300532102584839,
+      "learning_rate": 4.825905292479109e-05,
+      "loss": 1.8364,
+      "step": 210
+    },
+    {
+      "epoch": 0.4129516658845612,
+      "grad_norm": 3.4612457752227783,
+      "learning_rate": 4.79108635097493e-05,
+      "loss": 1.693,
+      "step": 220
+    },
+    {
+      "epoch": 0.43172219615204127,
+      "grad_norm": 3.306436777114868,
+      "learning_rate": 4.756267409470752e-05,
+      "loss": 1.5683,
+      "step": 230
+    },
+    {
+      "epoch": 0.45049272641952137,
+      "grad_norm": 3.522141695022583,
+      "learning_rate": 4.721448467966574e-05,
+      "loss": 1.5139,
+      "step": 240
+    },
+    {
+      "epoch": 0.4692632566870014,
+      "grad_norm": 3.511997938156128,
+      "learning_rate": 4.686629526462396e-05,
+      "loss": 1.4348,
+      "step": 250
+    },
+    {
+      "epoch": 0.48803378695448146,
+      "grad_norm": 3.343367576599121,
+      "learning_rate": 4.6518105849582176e-05,
+      "loss": 1.3879,
+      "step": 260
+    },
+    {
+      "epoch": 0.5068043172219615,
+      "grad_norm": 3.2004270553588867,
+      "learning_rate": 4.6169916434540394e-05,
+      "loss": 1.3787,
+      "step": 270
+    },
+    {
+      "epoch": 0.5255748474894416,
+      "grad_norm": 3.113529682159424,
+      "learning_rate": 4.582172701949861e-05,
+      "loss": 1.3503,
+      "step": 280
+    },
+    {
+      "epoch": 0.5443453777569216,
+      "grad_norm": 3.2955970764160156,
+      "learning_rate": 4.547353760445683e-05,
+      "loss": 1.3139,
+      "step": 290
+    },
+    {
+      "epoch": 0.5631159080244017,
+      "grad_norm": 3.1911182403564453,
+      "learning_rate": 4.5125348189415044e-05,
+      "loss": 1.2995,
+      "step": 300
+    },
+    {
+      "epoch": 0.5818864382918817,
+      "grad_norm": 3.416416645050049,
+      "learning_rate": 4.477715877437326e-05,
+      "loss": 1.2014,
+      "step": 310
+    },
+    {
+      "epoch": 0.6006569685593618,
+      "grad_norm": 4.6215410232543945,
+      "learning_rate": 4.442896935933148e-05,
+      "loss": 1.1872,
+      "step": 320
+    },
+    {
+      "epoch": 0.6194274988268419,
+      "grad_norm": 3.202340841293335,
+      "learning_rate": 4.40807799442897e-05,
+      "loss": 1.1448,
+      "step": 330
+    },
+    {
+      "epoch": 0.6381980290943219,
+      "grad_norm": 3.1017255783081055,
+      "learning_rate": 4.373259052924791e-05,
+      "loss": 1.2542,
+      "step": 340
+    },
+    {
+      "epoch": 0.656968559361802,
+      "grad_norm": 3.4146809577941895,
+      "learning_rate": 4.338440111420613e-05,
+      "loss": 1.143,
+      "step": 350
+    },
+    {
+      "epoch": 0.675739089629282,
+      "grad_norm": 3.4860126972198486,
+      "learning_rate": 4.303621169916435e-05,
+      "loss": 1.1936,
+      "step": 360
+    },
+    {
+      "epoch": 0.6945096198967621,
+      "grad_norm": 3.000502109527588,
+      "learning_rate": 4.268802228412256e-05,
+      "loss": 1.0896,
+      "step": 370
+    },
+    {
+      "epoch": 0.7132801501642422,
+      "grad_norm": 3.3682737350463867,
+      "learning_rate": 4.233983286908078e-05,
+      "loss": 0.9863,
+      "step": 380
+    },
+    {
+      "epoch": 0.7320506804317222,
+      "grad_norm": 2.903545379638672,
+      "learning_rate": 4.1991643454039e-05,
+      "loss": 1.0206,
+      "step": 390
+    },
+    {
+      "epoch": 0.7508212106992023,
+      "grad_norm": 3.5237250328063965,
+      "learning_rate": 4.164345403899722e-05,
+      "loss": 1.0655,
+      "step": 400
+    },
+    {
+      "epoch": 0.7695917409666823,
+      "grad_norm": 3.5761749744415283,
+      "learning_rate": 4.129526462395543e-05,
+      "loss": 1.0016,
+      "step": 410
+    },
+    {
+      "epoch": 0.7883622712341624,
+      "grad_norm": 3.3111650943756104,
+      "learning_rate": 4.094707520891365e-05,
+      "loss": 1.0225,
+      "step": 420
+    },
+    {
+      "epoch": 0.8071328015016425,
+      "grad_norm": 2.9664759635925293,
+      "learning_rate": 4.0598885793871866e-05,
+      "loss": 1.0624,
+      "step": 430
+    },
+    {
+      "epoch": 0.8259033317691225,
+      "grad_norm": 3.750908136367798,
+      "learning_rate": 4.0250696378830085e-05,
+      "loss": 1.0223,
+      "step": 440
+    },
+    {
+      "epoch": 0.8446738620366026,
+      "grad_norm": 3.176248788833618,
+      "learning_rate": 3.9902506963788303e-05,
+      "loss": 0.9834,
+      "step": 450
+    },
+    {
+      "epoch": 0.8634443923040825,
+      "grad_norm": 3.428868293762207,
+      "learning_rate": 3.955431754874652e-05,
+      "loss": 1.0338,
+      "step": 460
+    },
+    {
+      "epoch": 0.8822149225715626,
+      "grad_norm": 3.6257553100585938,
+      "learning_rate": 3.920612813370474e-05,
+      "loss": 1.0156,
+      "step": 470
+    },
+    {
+      "epoch": 0.9009854528390427,
+      "grad_norm": 3.195758104324341,
+      "learning_rate": 3.885793871866296e-05,
+      "loss": 1.0171,
+      "step": 480
+    },
+    {
+      "epoch": 0.9197559831065227,
+      "grad_norm": 2.6419994831085205,
+      "learning_rate": 3.850974930362117e-05,
+      "loss": 0.9735,
+      "step": 490
+    },
+    {
+      "epoch": 0.9385265133740028,
+      "grad_norm": 3.462597608566284,
+      "learning_rate": 3.816155988857939e-05,
+      "loss": 1.0049,
+      "step": 500
+    },
+    {
+      "epoch": 0.9572970436414828,
+      "grad_norm": 2.8139870166778564,
+      "learning_rate": 3.781337047353761e-05,
+      "loss": 0.9696,
+      "step": 510
+    },
+    {
+      "epoch": 0.9760675739089629,
+      "grad_norm": 3.027811288833618,
+      "learning_rate": 3.746518105849583e-05,
+      "loss": 0.9242,
+      "step": 520
+    },
+    {
+      "epoch": 0.994838104176443,
+      "grad_norm": 3.6760294437408447,
+      "learning_rate": 3.711699164345404e-05,
+      "loss": 0.912,
+      "step": 530
+    },
+    {
+      "epoch": 0.998592210229939,
+      "eval_accuracy": 0.7968316831683169,
+      "eval_loss": 0.8397366404533386,
+      "eval_runtime": 120.4032,
+      "eval_samples_per_second": 62.914,
+      "eval_steps_per_second": 1.968,
+      "step": 532
+    },
+    {
+      "epoch": 1.013608634443923,
+      "grad_norm": 2.9862356185913086,
+      "learning_rate": 3.676880222841226e-05,
+      "loss": 0.8398,
+      "step": 540
+    },
+    {
+      "epoch": 1.0323791647114031,
+      "grad_norm": 3.0735647678375244,
+      "learning_rate": 3.642061281337048e-05,
+      "loss": 0.8379,
+      "step": 550
+    },
+    {
+      "epoch": 1.0511496949788832,
+      "grad_norm": 3.5392494201660156,
+      "learning_rate": 3.607242339832869e-05,
+      "loss": 0.8264,
+      "step": 560
+    },
+    {
+      "epoch": 1.069920225246363,
+      "grad_norm": 3.511627435684204,
+      "learning_rate": 3.572423398328691e-05,
+      "loss": 0.8204,
+      "step": 570
+    },
+    {
+      "epoch": 1.0886907555138432,
+      "grad_norm": 3.393699884414673,
+      "learning_rate": 3.5376044568245126e-05,
+      "loss": 0.8963,
+      "step": 580
+    },
+    {
+      "epoch": 1.1074612857813233,
+      "grad_norm": 4.332097053527832,
+      "learning_rate": 3.5027855153203345e-05,
+      "loss": 0.8623,
+      "step": 590
+    },
+    {
+      "epoch": 1.1262318160488034,
+      "grad_norm": 3.1262927055358887,
+      "learning_rate": 3.4679665738161556e-05,
+      "loss": 0.8025,
+      "step": 600
+    },
+    {
+      "epoch": 1.1450023463162835,
+      "grad_norm": 3.1130638122558594,
+      "learning_rate": 3.4331476323119775e-05,
+      "loss": 0.8388,
+      "step": 610
+    },
+    {
+      "epoch": 1.1637728765837636,
+      "grad_norm": 3.509665012359619,
+      "learning_rate": 3.3983286908077994e-05,
+      "loss": 0.8306,
+      "step": 620
+    },
+    {
+      "epoch": 1.1825434068512435,
+      "grad_norm": 3.595564842224121,
+      "learning_rate": 3.363509749303621e-05,
+      "loss": 0.8241,
+      "step": 630
+    },
+    {
+      "epoch": 1.2013139371187236,
+      "grad_norm": 2.9736037254333496,
+      "learning_rate": 3.328690807799443e-05,
+      "loss": 0.8333,
+      "step": 640
+    },
+    {
+      "epoch": 1.2200844673862037,
+      "grad_norm": 3.5477538108825684,
+      "learning_rate": 3.293871866295265e-05,
+      "loss": 0.84,
+      "step": 650
+    },
+    {
+      "epoch": 1.2388549976536838,
+      "grad_norm": 3.617574453353882,
+      "learning_rate": 3.259052924791087e-05,
+      "loss": 0.7652,
+      "step": 660
+    },
+    {
+      "epoch": 1.2576255279211637,
+      "grad_norm": 3.621431589126587,
+      "learning_rate": 3.224233983286909e-05,
+      "loss": 0.7634,
+      "step": 670
+    },
+    {
+      "epoch": 1.2763960581886438,
+      "grad_norm": 3.5540874004364014,
+      "learning_rate": 3.18941504178273e-05,
+      "loss": 0.7951,
+      "step": 680
+    },
+    {
+      "epoch": 1.2951665884561239,
+      "grad_norm": 3.1876299381256104,
+      "learning_rate": 3.154596100278552e-05,
+      "loss": 0.787,
+      "step": 690
+    },
+    {
+      "epoch": 1.313937118723604,
+      "grad_norm": 3.8441293239593506,
+      "learning_rate": 3.1197771587743737e-05,
+      "loss": 0.7799,
+      "step": 700
+    },
+    {
+      "epoch": 1.332707648991084,
+      "grad_norm": 3.2278223037719727,
+      "learning_rate": 3.0849582172701955e-05,
+      "loss": 0.845,
+      "step": 710
+    },
+    {
+      "epoch": 1.3514781792585642,
+      "grad_norm": 3.1881773471832275,
+      "learning_rate": 3.050139275766017e-05,
+      "loss": 0.7415,
+      "step": 720
+    },
+    {
+      "epoch": 1.370248709526044,
+      "grad_norm": 3.34291672706604,
+      "learning_rate": 3.0153203342618386e-05,
+      "loss": 0.7424,
+      "step": 730
+    },
+    {
+      "epoch": 1.3890192397935242,
+      "grad_norm": 5.416168689727783,
+      "learning_rate": 2.98050139275766e-05,
+      "loss": 0.8025,
+      "step": 740
+    },
+    {
+      "epoch": 1.4077897700610043,
+      "grad_norm": 3.111943244934082,
+      "learning_rate": 2.945682451253482e-05,
+      "loss": 0.8537,
+      "step": 750
+    },
+    {
+      "epoch": 1.4265603003284844,
+      "grad_norm": 3.118394374847412,
+      "learning_rate": 2.9108635097493035e-05,
+      "loss": 0.7796,
+      "step": 760
+    },
+    {
+      "epoch": 1.4453308305959642,
+      "grad_norm": 3.8577425479888916,
+      "learning_rate": 2.8760445682451254e-05,
+      "loss": 0.7926,
+      "step": 770
+    },
+    {
+      "epoch": 1.4641013608634443,
+      "grad_norm": 4.177425384521484,
+      "learning_rate": 2.841225626740947e-05,
+      "loss": 0.7863,
+      "step": 780
+    },
+    {
+      "epoch": 1.4828718911309244,
+      "grad_norm": 3.465301275253296,
+      "learning_rate": 2.8064066852367688e-05,
+      "loss": 0.7687,
+      "step": 790
+    },
+    {
+      "epoch": 1.5016424213984045,
+      "grad_norm": 3.2810630798339844,
+      "learning_rate": 2.7715877437325903e-05,
+      "loss": 0.8167,
+      "step": 800
+    },
+    {
+      "epoch": 1.5204129516658846,
+      "grad_norm": 3.05501127243042,
+      "learning_rate": 2.736768802228412e-05,
+      "loss": 0.7299,
+      "step": 810
+    },
+    {
+      "epoch": 1.5391834819333647,
+      "grad_norm": 2.9829261302948,
+      "learning_rate": 2.7019498607242337e-05,
+      "loss": 0.7677,
+      "step": 820
+    },
+    {
+      "epoch": 1.5579540122008448,
+      "grad_norm": 3.561396360397339,
+      "learning_rate": 2.6671309192200562e-05,
+      "loss": 0.7813,
+      "step": 830
+    },
+    {
+      "epoch": 1.5767245424683247,
+      "grad_norm": 2.8982436656951904,
+      "learning_rate": 2.6323119777158778e-05,
+      "loss": 0.731,
+      "step": 840
+    },
+    {
+      "epoch": 1.5954950727358048,
+      "grad_norm": 3.406423330307007,
+      "learning_rate": 2.5974930362116996e-05,
+      "loss": 0.73,
+      "step": 850
+    },
+    {
+      "epoch": 1.6142656030032847,
+      "grad_norm": 3.0049679279327393,
+      "learning_rate": 2.562674094707521e-05,
+      "loss": 0.7826,
+      "step": 860
+    },
+    {
+      "epoch": 1.6330361332707648,
+      "grad_norm": 3.500458002090454,
+      "learning_rate": 2.527855153203343e-05,
+      "loss": 0.7499,
+      "step": 870
+    },
+    {
+      "epoch": 1.651806663538245,
+      "grad_norm": 3.0891637802124023,
+      "learning_rate": 2.4930362116991646e-05,
+      "loss": 0.7587,
+      "step": 880
+    },
+    {
+      "epoch": 1.670577193805725,
+      "grad_norm": 3.149742841720581,
+      "learning_rate": 2.4582172701949864e-05,
+      "loss": 0.7374,
+      "step": 890
+    },
+    {
+      "epoch": 1.689347724073205,
+      "grad_norm": 3.1518661975860596,
+      "learning_rate": 2.423398328690808e-05,
+      "loss": 0.7893,
+      "step": 900
+    },
+    {
+      "epoch": 1.7081182543406852,
+      "grad_norm": 3.08024525642395,
+      "learning_rate": 2.3885793871866298e-05,
+      "loss": 0.7853,
+      "step": 910
+    },
+    {
+      "epoch": 1.7268887846081653,
+      "grad_norm": 3.158390522003174,
+      "learning_rate": 2.3537604456824514e-05,
+      "loss": 0.7694,
+      "step": 920
+    },
+    {
+      "epoch": 1.7456593148756452,
+      "grad_norm": 3.0718069076538086,
+      "learning_rate": 2.318941504178273e-05,
+      "loss": 0.7843,
+      "step": 930
+    },
+    {
+      "epoch": 1.7644298451431253,
+      "grad_norm": 3.4767684936523438,
+      "learning_rate": 2.2841225626740948e-05,
+      "loss": 0.7483,
+      "step": 940
+    },
+    {
+      "epoch": 1.7832003754106054,
+      "grad_norm": 3.350541353225708,
+      "learning_rate": 2.2493036211699163e-05,
+      "loss": 0.7457,
+      "step": 950
+    },
+    {
+      "epoch": 1.8019709056780853,
+      "grad_norm": 3.4785940647125244,
+      "learning_rate": 2.214484679665738e-05,
+      "loss": 0.7586,
+      "step": 960
+    },
+    {
+      "epoch": 1.8207414359455654,
+      "grad_norm": 3.245809316635132,
+      "learning_rate": 2.17966573816156e-05,
+      "loss": 0.7035,
+      "step": 970
+    },
+    {
+      "epoch": 1.8395119662130455,
+      "grad_norm": 3.292048692703247,
+      "learning_rate": 2.144846796657382e-05,
+      "loss": 0.7366,
+      "step": 980
+    },
+    {
+      "epoch": 1.8582824964805256,
+      "grad_norm": 4.539200305938721,
+      "learning_rate": 2.1100278551532034e-05,
+      "loss": 0.7377,
+      "step": 990
+    },
+    {
+      "epoch": 1.8770530267480057,
+      "grad_norm": 2.6051247119903564,
+      "learning_rate": 2.0752089136490253e-05,
+      "loss": 0.8442,
+      "step": 1000
+    },
+    {
+      "epoch": 1.8958235570154858,
+      "grad_norm": 3.7407584190368652,
+      "learning_rate": 2.0403899721448468e-05,
+      "loss": 0.732,
+      "step": 1010
+    },
+    {
+      "epoch": 1.9145940872829659,
+      "grad_norm": 3.855624198913574,
+      "learning_rate": 2.0055710306406687e-05,
+      "loss": 0.8147,
+      "step": 1020
+    },
+    {
+      "epoch": 1.9333646175504458,
+      "grad_norm": 3.7936253547668457,
+      "learning_rate": 1.9707520891364902e-05,
+      "loss": 0.6852,
+      "step": 1030
+    },
+    {
+      "epoch": 1.9521351478179259,
+      "grad_norm": 3.4030821323394775,
+      "learning_rate": 1.935933147632312e-05,
+      "loss": 0.7311,
+      "step": 1040
+    },
+    {
+      "epoch": 1.970905678085406,
+      "grad_norm": 3.5526273250579834,
+      "learning_rate": 1.9011142061281336e-05,
+      "loss": 0.7846,
+      "step": 1050
+    },
+    {
+      "epoch": 1.9896762083528858,
+      "grad_norm": 3.1862409114837646,
+      "learning_rate": 1.8662952646239558e-05,
+      "loss": 0.7233,
+      "step": 1060
+    },
+    {
+      "epoch": 1.999061473486626,
+      "eval_accuracy": 0.8294389438943894,
+      "eval_loss": 0.6780887842178345,
+      "eval_runtime": 125.2435,
+      "eval_samples_per_second": 60.482,
+      "eval_steps_per_second": 1.892,
+      "step": 1065
+    },
+    {
+      "epoch": 2.008446738620366,
+      "grad_norm": 3.521763563156128,
+      "learning_rate": 1.8314763231197773e-05,
+      "loss": 0.6907,
+      "step": 1070
+    },
+    {
+      "epoch": 2.027217268887846,
+      "grad_norm": 3.0328750610351562,
+      "learning_rate": 1.7966573816155992e-05,
+      "loss": 0.6829,
+      "step": 1080
+    },
+    {
+      "epoch": 2.045987799155326,
+      "grad_norm": 4.274998188018799,
+      "learning_rate": 1.7618384401114207e-05,
+      "loss": 0.635,
+      "step": 1090
+    },
+    {
+      "epoch": 2.0647583294228062,
+      "grad_norm": 3.0744423866271973,
+      "learning_rate": 1.7270194986072426e-05,
+      "loss": 0.7163,
+      "step": 1100
+    },
+    {
+      "epoch": 2.0835288596902863,
+      "grad_norm": 3.0927815437316895,
+      "learning_rate": 1.692200557103064e-05,
+      "loss": 0.6817,
+      "step": 1110
+    },
+    {
+      "epoch": 2.1022993899577664,
+      "grad_norm": 2.5418238639831543,
+      "learning_rate": 1.6573816155988857e-05,
+      "loss": 0.6235,
+      "step": 1120
+    },
+    {
+      "epoch": 2.1210699202252465,
+      "grad_norm": 3.1327621936798096,
+      "learning_rate": 1.6225626740947075e-05,
+      "loss": 0.6351,
+      "step": 1130
+    },
+    {
+      "epoch": 2.139840450492726,
+      "grad_norm": 4.186623573303223,
+      "learning_rate": 1.587743732590529e-05,
+      "loss": 0.6383,
+      "step": 1140
+    },
+    {
+      "epoch": 2.1586109807602063,
+      "grad_norm": 3.007977247238159,
+      "learning_rate": 1.552924791086351e-05,
+      "loss": 0.6994,
+      "step": 1150
+    },
+    {
+      "epoch": 2.1773815110276864,
+      "grad_norm": 3.232109308242798,
+      "learning_rate": 1.518105849582173e-05,
+      "loss": 0.6615,
+      "step": 1160
+    },
+    {
+      "epoch": 2.1961520412951665,
+      "grad_norm": 2.8354580402374268,
+      "learning_rate": 1.4832869080779947e-05,
+      "loss": 0.6686,
+      "step": 1170
+    },
+    {
+      "epoch": 2.2149225715626466,
+      "grad_norm": 3.6194567680358887,
+      "learning_rate": 1.4484679665738164e-05,
+      "loss": 0.6649,
+      "step": 1180
+    },
+    {
+      "epoch": 2.2336931018301267,
+      "grad_norm": 4.279024600982666,
+      "learning_rate": 1.413649025069638e-05,
+      "loss": 0.6529,
+      "step": 1190
+    },
+    {
+      "epoch": 2.252463632097607,
+      "grad_norm": 3.035931348800659,
+      "learning_rate": 1.3788300835654596e-05,
+      "loss": 0.6796,
+      "step": 1200
+    },
+    {
+      "epoch": 2.271234162365087,
+      "grad_norm": 3.2257440090179443,
+      "learning_rate": 1.3440111420612813e-05,
+      "loss": 0.6501,
+      "step": 1210
+    },
+    {
+      "epoch": 2.290004692632567,
+      "grad_norm": 3.176237106323242,
+      "learning_rate": 1.309192200557103e-05,
+      "loss": 0.7252,
+      "step": 1220
+    },
+    {
+      "epoch": 2.308775222900047,
+      "grad_norm": 3.575956344604492,
+      "learning_rate": 1.2743732590529247e-05,
+      "loss": 0.6798,
+      "step": 1230
+    },
+    {
+      "epoch": 2.327545753167527,
+      "grad_norm": 3.1033012866973877,
+      "learning_rate": 1.2395543175487466e-05,
+      "loss": 0.6291,
+      "step": 1240
+    },
+    {
+      "epoch": 2.346316283435007,
+      "grad_norm": 3.797513961791992,
+      "learning_rate": 1.2047353760445683e-05,
+      "loss": 0.6908,
+      "step": 1250
+    },
+    {
+      "epoch": 2.365086813702487,
+      "grad_norm": 3.399435043334961,
+      "learning_rate": 1.16991643454039e-05,
+      "loss": 0.5915,
+      "step": 1260
+    },
+    {
+      "epoch": 2.383857343969967,
+      "grad_norm": 2.9869837760925293,
+      "learning_rate": 1.1350974930362116e-05,
+      "loss": 0.6665,
+      "step": 1270
+    },
+    {
+      "epoch": 2.402627874237447,
+      "grad_norm": 3.171600580215454,
+      "learning_rate": 1.1002785515320335e-05,
+      "loss": 0.6202,
+      "step": 1280
+    },
+    {
+      "epoch": 2.4213984045049273,
+      "grad_norm": 2.0792176723480225,
+      "learning_rate": 1.0654596100278552e-05,
+      "loss": 0.5773,
+      "step": 1290
+    },
+    {
+      "epoch": 2.4401689347724074,
+      "grad_norm": 3.339883804321289,
+      "learning_rate": 1.0306406685236769e-05,
+      "loss": 0.6435,
+      "step": 1300
+    },
+    {
+      "epoch": 2.4589394650398875,
+      "grad_norm": 3.2008817195892334,
+      "learning_rate": 9.958217270194986e-06,
+      "loss": 0.6448,
+      "step": 1310
+    },
+    {
+      "epoch": 2.4777099953073676,
+      "grad_norm": 2.828279733657837,
+      "learning_rate": 9.610027855153205e-06,
+      "loss": 0.6121,
+      "step": 1320
+    },
+    {
+      "epoch": 2.4964805255748477,
+      "grad_norm": 2.920414924621582,
+      "learning_rate": 9.261838440111422e-06,
+      "loss": 0.6569,
+      "step": 1330
+    },
+    {
+      "epoch": 2.5152510558423273,
+      "grad_norm": 3.4083826541900635,
+      "learning_rate": 8.913649025069639e-06,
+      "loss": 0.6819,
+      "step": 1340
+    },
+    {
+      "epoch": 2.534021586109808,
+      "grad_norm": 2.8921279907226562,
+      "learning_rate": 8.565459610027856e-06,
+      "loss": 0.6257,
+      "step": 1350
+    },
+    {
+      "epoch": 2.5527921163772875,
+      "grad_norm": 3.33734130859375,
+      "learning_rate": 8.217270194986073e-06,
+      "loss": 0.6439,
+      "step": 1360
+    },
+    {
+      "epoch": 2.5715626466447676,
+      "grad_norm": 4.184520244598389,
+      "learning_rate": 7.869080779944291e-06,
+      "loss": 0.6742,
+      "step": 1370
+    },
+    {
+      "epoch": 2.5903331769122477,
+      "grad_norm": 3.5494306087493896,
+      "learning_rate": 7.5208913649025075e-06,
+      "loss": 0.6485,
+      "step": 1380
+    },
+    {
+      "epoch": 2.609103707179728,
+      "grad_norm": 2.791755437850952,
+      "learning_rate": 7.1727019498607245e-06,
+      "loss": 0.6061,
+      "step": 1390
+    },
+    {
+      "epoch": 2.627874237447208,
+      "grad_norm": 3.616508960723877,
+      "learning_rate": 6.8245125348189415e-06,
+      "loss": 0.7124,
+      "step": 1400
+    },
+    {
+      "epoch": 2.646644767714688,
+      "grad_norm": 3.5052719116210938,
+      "learning_rate": 6.4763231197771585e-06,
+      "loss": 0.6545,
+      "step": 1410
+    },
+    {
+      "epoch": 2.665415297982168,
+      "grad_norm": 3.463571786880493,
+      "learning_rate": 6.128133704735376e-06,
+      "loss": 0.5905,
+      "step": 1420
+    },
+    {
+      "epoch": 2.684185828249648,
+      "grad_norm": 3.2912559509277344,
+      "learning_rate": 5.779944289693594e-06,
+      "loss": 0.5468,
+      "step": 1430
+    },
+    {
+      "epoch": 2.7029563585171283,
+      "grad_norm": 3.0410854816436768,
+      "learning_rate": 5.43175487465181e-06,
+      "loss": 0.6286,
+      "step": 1440
+    },
+    {
+      "epoch": 2.721726888784608,
+      "grad_norm": 3.565953016281128,
+      "learning_rate": 5.083565459610028e-06,
+      "loss": 0.6567,
+      "step": 1450
+    },
+    {
+      "epoch": 2.740497419052088,
+      "grad_norm": 3.0948688983917236,
+      "learning_rate": 4.735376044568245e-06,
+      "loss": 0.644,
+      "step": 1460
+    },
+    {
+      "epoch": 2.759267949319568,
+      "grad_norm": 3.734225034713745,
+      "learning_rate": 4.387186629526462e-06,
+      "loss": 0.65,
+      "step": 1470
+    },
+    {
+      "epoch": 2.7780384795870483,
+      "grad_norm": 3.245823860168457,
+      "learning_rate": 4.03899721448468e-06,
+      "loss": 0.613,
+      "step": 1480
+    },
+    {
+      "epoch": 2.7968090098545284,
+      "grad_norm": 2.6476266384124756,
+      "learning_rate": 3.690807799442897e-06,
+      "loss": 0.6562,
+      "step": 1490
+    },
+    {
+      "epoch": 2.8155795401220085,
+      "grad_norm": 3.1951568126678467,
+      "learning_rate": 3.3426183844011147e-06,
+      "loss": 0.5897,
+      "step": 1500
+    },
+    {
+      "epoch": 2.8343500703894886,
+      "grad_norm": 3.754561424255371,
+      "learning_rate": 2.9944289693593313e-06,
+      "loss": 0.5686,
+      "step": 1510
+    },
+    {
+      "epoch": 2.8531206006569687,
+      "grad_norm": 2.6758432388305664,
+      "learning_rate": 2.6462395543175487e-06,
+      "loss": 0.6791,
+      "step": 1520
+    },
+    {
+      "epoch": 2.871891130924449,
+      "grad_norm": 2.6981005668640137,
+      "learning_rate": 2.298050139275766e-06,
+      "loss": 0.5544,
+      "step": 1530
+    },
+    {
+      "epoch": 2.8906616611919285,
+      "grad_norm": 3.503272294998169,
+      "learning_rate": 1.9498607242339835e-06,
+      "loss": 0.6921,
+      "step": 1540
+    },
+    {
+      "epoch": 2.9094321914594086,
+      "grad_norm": 3.2283263206481934,
+      "learning_rate": 1.6016713091922007e-06,
+      "loss": 0.6307,
+      "step": 1550
+    },
+    {
+      "epoch": 2.9282027217268887,
+      "grad_norm": 3.2601311206817627,
+      "learning_rate": 1.253481894150418e-06,
+      "loss": 0.653,
+      "step": 1560
+    },
+    {
+      "epoch": 2.9469732519943688,
+      "grad_norm": 3.5451338291168213,
+      "learning_rate": 9.052924791086352e-07,
+      "loss": 0.5953,
+      "step": 1570
+    },
+    {
+      "epoch": 2.965743782261849,
+      "grad_norm": 3.605889081954956,
+      "learning_rate": 5.571030640668524e-07,
+      "loss": 0.6338,
+      "step": 1580
+    },
+    {
+      "epoch": 2.984514312529329,
+      "grad_norm": 4.055668830871582,
+      "learning_rate": 2.0891364902506967e-07,
+      "loss": 0.6047,
+      "step": 1590
+    },
+    {
+      "epoch": 2.995776630689817,
+      "eval_accuracy": 0.834983498349835,
+      "eval_loss": 0.6400949358940125,
+      "eval_runtime": 126.916,
+      "eval_samples_per_second": 59.685,
+      "eval_steps_per_second": 1.867,
+      "step": 1596
+    },
+    {
+      "epoch": 2.995776630689817,
+      "step": 1596,
+      "total_flos": 1.584209316447959e+19,
+      "train_loss": 1.1699181137825911,
+      "train_runtime": 8274.5103,
+      "train_samples_per_second": 24.717,
+      "train_steps_per_second": 0.193
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1596,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.584209316447959e+19,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}