ulisesbravo commited on Oct 31

Commit

7655b85

•

1 Parent(s): a6e3062

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

README.md +36 -0
checkpoint-8454/config.json +48 -0
checkpoint-8454/model.safetensors +3 -0
checkpoint-8454/optimizer.pt +3 -0
checkpoint-8454/rng_state.pth +3 -0
checkpoint-8454/scheduler.pt +3 -0
checkpoint-8454/trainer_state.json +2462 -0
checkpoint-8454/training_args.bin +3 -0
config.json +48 -0
model.safetensors +3 -0
runs/Oct30_19-52-50_r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva/events.out.tfevents.1730317972.r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva.112.0 +2 -2
runs/Oct30_19-52-50_r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva/events.out.tfevents.1730377445.r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva.112.1 +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+tags:
+- autotrain
+- text-classification
+base_model: neuralmind/bert-base-portuguese-cased
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 0.7668559551239014
+f1_macro: 0.6934027713968594
+f1_micro: 0.6885536823425022
+f1_weighted: 0.6903326985699842
+precision_macro: 0.7104396368026238
+precision_micro: 0.6885536823425022
+precision_weighted: 0.705612321743312
+recall_macro: 0.6898371166175705
+recall_micro: 0.6885536823425022
+recall_weighted: 0.6885536823425022
+accuracy: 0.6885536823425022

checkpoint-8454/config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 - INTIMA\u00c7\u00c3O INICIAL",
+    "1": "2 - INTIMA\u00c7\u00c3O INSTRU\u00c7\u00c3O",
+    "2": "3 - INTIMA\u00c7\u00c3O RECURSAL",
+    "3": "4 - INTIMA\u00c7\u00c3O CUMPRIMENTO DE SENTEN\u00c7A",
+    "4": "5 - INTIMA\u00c7\u00c3O TR\u00c2NSITO"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 - INTIMA\u00c7\u00c3O INICIAL": 0,
+    "2 - INTIMA\u00c7\u00c3O INSTRU\u00c7\u00c3O": 1,
+    "3 - INTIMA\u00c7\u00c3O RECURSAL": 2,
+    "4 - INTIMA\u00c7\u00c3O CUMPRIMENTO DE SENTEN\u00c7A": 3,
+    "5 - INTIMA\u00c7\u00c3O TR\u00c2NSITO": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 29794
+}

checkpoint-8454/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7b5aea858f25e24827711cfbc02c35b160928b6c19e405df1c24d90f5be4a7d
+size 435731452

checkpoint-8454/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8500747b1f542fd7722e9ad75be164317058c0f43f1f6ed49a98609035bade2
+size 871578362

checkpoint-8454/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9dfee3efe76126be1b59aa57fecfecdae10b2991270f6288b7b3669660418ce
+size 13990

checkpoint-8454/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:892595011cd8f92081483aabb2f75366581579d7002ddf5c707401b4c494b888
+size 1064

checkpoint-8454/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2462 @@

+{
+  "best_metric": 0.7668559551239014,
+  "best_model_checkpoint": "autotrain-k9ag8-b7nm0/checkpoint-8454",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 8454,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008871540099361249,
+      "grad_norm": 7.602209568023682,
+      "learning_rate": 1.4775413711583925e-06,
+      "loss": 1.6215,
+      "step": 25
+    },
+    {
+      "epoch": 0.017743080198722498,
+      "grad_norm": 10.643656730651855,
+      "learning_rate": 2.955082742316785e-06,
+      "loss": 1.6427,
+      "step": 50
+    },
+    {
+      "epoch": 0.026614620298083747,
+      "grad_norm": 7.053741455078125,
+      "learning_rate": 4.432624113475177e-06,
+      "loss": 1.614,
+      "step": 75
+    },
+    {
+      "epoch": 0.035486160397444996,
+      "grad_norm": 6.636758804321289,
+      "learning_rate": 5.91016548463357e-06,
+      "loss": 1.5967,
+      "step": 100
+    },
+    {
+      "epoch": 0.04435770049680625,
+      "grad_norm": 6.755153656005859,
+      "learning_rate": 7.387706855791962e-06,
+      "loss": 1.596,
+      "step": 125
+    },
+    {
+      "epoch": 0.053229240596167494,
+      "grad_norm": 6.6177239418029785,
+      "learning_rate": 8.865248226950355e-06,
+      "loss": 1.564,
+      "step": 150
+    },
+    {
+      "epoch": 0.06210078069552875,
+      "grad_norm": 4.62630558013916,
+      "learning_rate": 1.0342789598108746e-05,
+      "loss": 1.5625,
+      "step": 175
+    },
+    {
+      "epoch": 0.07097232079488999,
+      "grad_norm": 5.605529308319092,
+      "learning_rate": 1.182033096926714e-05,
+      "loss": 1.5312,
+      "step": 200
+    },
+    {
+      "epoch": 0.07984386089425125,
+      "grad_norm": 9.841400146484375,
+      "learning_rate": 1.3297872340425532e-05,
+      "loss": 1.4957,
+      "step": 225
+    },
+    {
+      "epoch": 0.0887154009936125,
+      "grad_norm": 6.7550458908081055,
+      "learning_rate": 1.4775413711583924e-05,
+      "loss": 1.4394,
+      "step": 250
+    },
+    {
+      "epoch": 0.09758694109297374,
+      "grad_norm": 8.143739700317383,
+      "learning_rate": 1.6252955082742317e-05,
+      "loss": 1.298,
+      "step": 275
+    },
+    {
+      "epoch": 0.10645848119233499,
+      "grad_norm": 8.003219604492188,
+      "learning_rate": 1.773049645390071e-05,
+      "loss": 1.2673,
+      "step": 300
+    },
+    {
+      "epoch": 0.11533002129169624,
+      "grad_norm": 11.118531227111816,
+      "learning_rate": 1.92080378250591e-05,
+      "loss": 1.3083,
+      "step": 325
+    },
+    {
+      "epoch": 0.1242015613910575,
+      "grad_norm": 12.492416381835938,
+      "learning_rate": 2.0685579196217493e-05,
+      "loss": 1.2445,
+      "step": 350
+    },
+    {
+      "epoch": 0.13307310149041873,
+      "grad_norm": 4.358503818511963,
+      "learning_rate": 2.2163120567375885e-05,
+      "loss": 1.146,
+      "step": 375
+    },
+    {
+      "epoch": 0.14194464158977999,
+      "grad_norm": 7.787415504455566,
+      "learning_rate": 2.364066193853428e-05,
+      "loss": 1.3067,
+      "step": 400
+    },
+    {
+      "epoch": 0.15081618168914124,
+      "grad_norm": 5.6704912185668945,
+      "learning_rate": 2.5118203309692672e-05,
+      "loss": 1.2465,
+      "step": 425
+    },
+    {
+      "epoch": 0.1596877217885025,
+      "grad_norm": 6.449095249176025,
+      "learning_rate": 2.6595744680851064e-05,
+      "loss": 1.144,
+      "step": 450
+    },
+    {
+      "epoch": 0.16855926188786374,
+      "grad_norm": 10.410565376281738,
+      "learning_rate": 2.8073286052009455e-05,
+      "loss": 1.2432,
+      "step": 475
+    },
+    {
+      "epoch": 0.177430801987225,
+      "grad_norm": 11.332856178283691,
+      "learning_rate": 2.9550827423167847e-05,
+      "loss": 1.1153,
+      "step": 500
+    },
+    {
+      "epoch": 0.18630234208658622,
+      "grad_norm": 5.967296600341797,
+      "learning_rate": 3.102836879432624e-05,
+      "loss": 1.0873,
+      "step": 525
+    },
+    {
+      "epoch": 0.19517388218594747,
+      "grad_norm": 8.682112693786621,
+      "learning_rate": 3.2505910165484634e-05,
+      "loss": 1.1949,
+      "step": 550
+    },
+    {
+      "epoch": 0.20404542228530873,
+      "grad_norm": 9.76791763305664,
+      "learning_rate": 3.3983451536643026e-05,
+      "loss": 1.2161,
+      "step": 575
+    },
+    {
+      "epoch": 0.21291696238466998,
+      "grad_norm": 5.555416107177734,
+      "learning_rate": 3.546099290780142e-05,
+      "loss": 1.2722,
+      "step": 600
+    },
+    {
+      "epoch": 0.22178850248403123,
+      "grad_norm": 7.815185546875,
+      "learning_rate": 3.693853427895981e-05,
+      "loss": 1.0951,
+      "step": 625
+    },
+    {
+      "epoch": 0.23066004258339248,
+      "grad_norm": 6.9715895652771,
+      "learning_rate": 3.84160756501182e-05,
+      "loss": 1.1922,
+      "step": 650
+    },
+    {
+      "epoch": 0.23953158268275374,
+      "grad_norm": 5.526481628417969,
+      "learning_rate": 3.9893617021276594e-05,
+      "loss": 1.2998,
+      "step": 675
+    },
+    {
+      "epoch": 0.248403122782115,
+      "grad_norm": 10.200905799865723,
+      "learning_rate": 4.1371158392434986e-05,
+      "loss": 1.113,
+      "step": 700
+    },
+    {
+      "epoch": 0.2572746628814762,
+      "grad_norm": 8.1790771484375,
+      "learning_rate": 4.284869976359338e-05,
+      "loss": 1.0354,
+      "step": 725
+    },
+    {
+      "epoch": 0.26614620298083747,
+      "grad_norm": 12.212372779846191,
+      "learning_rate": 4.432624113475177e-05,
+      "loss": 1.1553,
+      "step": 750
+    },
+    {
+      "epoch": 0.2750177430801987,
+      "grad_norm": 7.486617565155029,
+      "learning_rate": 4.580378250591017e-05,
+      "loss": 1.0368,
+      "step": 775
+    },
+    {
+      "epoch": 0.28388928317955997,
+      "grad_norm": 5.094996929168701,
+      "learning_rate": 4.728132387706856e-05,
+      "loss": 1.0357,
+      "step": 800
+    },
+    {
+      "epoch": 0.2927608232789212,
+      "grad_norm": 7.159431457519531,
+      "learning_rate": 4.875886524822695e-05,
+      "loss": 1.0414,
+      "step": 825
+    },
+    {
+      "epoch": 0.3016323633782825,
+      "grad_norm": 7.34111213684082,
+      "learning_rate": 4.9973711882229235e-05,
+      "loss": 1.0777,
+      "step": 850
+    },
+    {
+      "epoch": 0.31050390347764373,
+      "grad_norm": 10.668549537658691,
+      "learning_rate": 4.980941114616194e-05,
+      "loss": 1.0795,
+      "step": 875
+    },
+    {
+      "epoch": 0.319375443577005,
+      "grad_norm": 10.225701332092285,
+      "learning_rate": 4.9645110410094644e-05,
+      "loss": 1.0581,
+      "step": 900
+    },
+    {
+      "epoch": 0.32824698367636623,
+      "grad_norm": 10.795150756835938,
+      "learning_rate": 4.948080967402734e-05,
+      "loss": 1.0562,
+      "step": 925
+    },
+    {
+      "epoch": 0.3371185237757275,
+      "grad_norm": 5.316555500030518,
+      "learning_rate": 4.931650893796004e-05,
+      "loss": 0.9869,
+      "step": 950
+    },
+    {
+      "epoch": 0.34599006387508874,
+      "grad_norm": 5.70430850982666,
+      "learning_rate": 4.9152208201892744e-05,
+      "loss": 1.0267,
+      "step": 975
+    },
+    {
+      "epoch": 0.35486160397445,
+      "grad_norm": 8.478789329528809,
+      "learning_rate": 4.898790746582545e-05,
+      "loss": 1.0044,
+      "step": 1000
+    },
+    {
+      "epoch": 0.3637331440738112,
+      "grad_norm": 8.099587440490723,
+      "learning_rate": 4.8823606729758154e-05,
+      "loss": 0.9652,
+      "step": 1025
+    },
+    {
+      "epoch": 0.37260468417317244,
+      "grad_norm": 7.185507774353027,
+      "learning_rate": 4.865930599369085e-05,
+      "loss": 0.979,
+      "step": 1050
+    },
+    {
+      "epoch": 0.3814762242725337,
+      "grad_norm": 5.971964359283447,
+      "learning_rate": 4.8495005257623556e-05,
+      "loss": 1.0463,
+      "step": 1075
+    },
+    {
+      "epoch": 0.39034776437189495,
+      "grad_norm": 7.249493598937988,
+      "learning_rate": 4.833070452155626e-05,
+      "loss": 0.9667,
+      "step": 1100
+    },
+    {
+      "epoch": 0.3992193044712562,
+      "grad_norm": 5.658076763153076,
+      "learning_rate": 4.8166403785488965e-05,
+      "loss": 1.1338,
+      "step": 1125
+    },
+    {
+      "epoch": 0.40809084457061745,
+      "grad_norm": 9.11898136138916,
+      "learning_rate": 4.800210304942166e-05,
+      "loss": 1.0147,
+      "step": 1150
+    },
+    {
+      "epoch": 0.4169623846699787,
+      "grad_norm": 6.678395748138428,
+      "learning_rate": 4.783780231335437e-05,
+      "loss": 0.8906,
+      "step": 1175
+    },
+    {
+      "epoch": 0.42583392476933996,
+      "grad_norm": 6.076191425323486,
+      "learning_rate": 4.7673501577287065e-05,
+      "loss": 1.0508,
+      "step": 1200
+    },
+    {
+      "epoch": 0.4347054648687012,
+      "grad_norm": 4.180179119110107,
+      "learning_rate": 4.750920084121977e-05,
+      "loss": 0.9826,
+      "step": 1225
+    },
+    {
+      "epoch": 0.44357700496806246,
+      "grad_norm": 7.360926628112793,
+      "learning_rate": 4.7344900105152474e-05,
+      "loss": 0.9848,
+      "step": 1250
+    },
+    {
+      "epoch": 0.4524485450674237,
+      "grad_norm": 4.815805435180664,
+      "learning_rate": 4.718059936908518e-05,
+      "loss": 0.9799,
+      "step": 1275
+    },
+    {
+      "epoch": 0.46132008516678497,
+      "grad_norm": 6.276717662811279,
+      "learning_rate": 4.701629863301788e-05,
+      "loss": 1.0078,
+      "step": 1300
+    },
+    {
+      "epoch": 0.4701916252661462,
+      "grad_norm": 6.218778610229492,
+      "learning_rate": 4.685199789695058e-05,
+      "loss": 1.0242,
+      "step": 1325
+    },
+    {
+      "epoch": 0.47906316536550747,
+      "grad_norm": 7.256498336791992,
+      "learning_rate": 4.668769716088328e-05,
+      "loss": 0.8731,
+      "step": 1350
+    },
+    {
+      "epoch": 0.4879347054648687,
+      "grad_norm": 5.647130966186523,
+      "learning_rate": 4.652339642481598e-05,
+      "loss": 0.9373,
+      "step": 1375
+    },
+    {
+      "epoch": 0.49680624556423,
+      "grad_norm": 8.66212272644043,
+      "learning_rate": 4.635909568874869e-05,
+      "loss": 0.9476,
+      "step": 1400
+    },
+    {
+      "epoch": 0.5056777856635912,
+      "grad_norm": 5.111779689788818,
+      "learning_rate": 4.619479495268139e-05,
+      "loss": 0.9185,
+      "step": 1425
+    },
+    {
+      "epoch": 0.5145493257629524,
+      "grad_norm": 4.9285502433776855,
+      "learning_rate": 4.6030494216614097e-05,
+      "loss": 1.0187,
+      "step": 1450
+    },
+    {
+      "epoch": 0.5234208658623137,
+      "grad_norm": 7.633114814758301,
+      "learning_rate": 4.5866193480546794e-05,
+      "loss": 0.9257,
+      "step": 1475
+    },
+    {
+      "epoch": 0.5322924059616749,
+      "grad_norm": 5.585442543029785,
+      "learning_rate": 4.57018927444795e-05,
+      "loss": 1.0399,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5411639460610362,
+      "grad_norm": 4.591919898986816,
+      "learning_rate": 4.5537592008412197e-05,
+      "loss": 0.9151,
+      "step": 1525
+    },
+    {
+      "epoch": 0.5500354861603974,
+      "grad_norm": 6.754642486572266,
+      "learning_rate": 4.53732912723449e-05,
+      "loss": 1.0153,
+      "step": 1550
+    },
+    {
+      "epoch": 0.5589070262597587,
+      "grad_norm": 6.780356407165527,
+      "learning_rate": 4.5208990536277606e-05,
+      "loss": 0.9998,
+      "step": 1575
+    },
+    {
+      "epoch": 0.5677785663591199,
+      "grad_norm": 7.279979228973389,
+      "learning_rate": 4.50446898002103e-05,
+      "loss": 0.998,
+      "step": 1600
+    },
+    {
+      "epoch": 0.5766501064584811,
+      "grad_norm": 18.642566680908203,
+      "learning_rate": 4.488038906414301e-05,
+      "loss": 0.8556,
+      "step": 1625
+    },
+    {
+      "epoch": 0.5855216465578424,
+      "grad_norm": 5.179675579071045,
+      "learning_rate": 4.471608832807571e-05,
+      "loss": 0.9329,
+      "step": 1650
+    },
+    {
+      "epoch": 0.5943931866572036,
+      "grad_norm": 11.334458351135254,
+      "learning_rate": 4.455178759200842e-05,
+      "loss": 0.9082,
+      "step": 1675
+    },
+    {
+      "epoch": 0.603264726756565,
+      "grad_norm": 7.290953159332275,
+      "learning_rate": 4.438748685594112e-05,
+      "loss": 0.8716,
+      "step": 1700
+    },
+    {
+      "epoch": 0.6121362668559261,
+      "grad_norm": 3.186927556991577,
+      "learning_rate": 4.422318611987382e-05,
+      "loss": 0.9728,
+      "step": 1725
+    },
+    {
+      "epoch": 0.6210078069552875,
+      "grad_norm": 6.01223087310791,
+      "learning_rate": 4.405888538380652e-05,
+      "loss": 0.9188,
+      "step": 1750
+    },
+    {
+      "epoch": 0.6298793470546487,
+      "grad_norm": 7.13861608505249,
+      "learning_rate": 4.389458464773922e-05,
+      "loss": 0.907,
+      "step": 1775
+    },
+    {
+      "epoch": 0.63875088715401,
+      "grad_norm": 5.580389499664307,
+      "learning_rate": 4.3730283911671926e-05,
+      "loss": 1.0589,
+      "step": 1800
+    },
+    {
+      "epoch": 0.6476224272533712,
+      "grad_norm": 7.246824264526367,
+      "learning_rate": 4.356598317560463e-05,
+      "loss": 0.9974,
+      "step": 1825
+    },
+    {
+      "epoch": 0.6564939673527325,
+      "grad_norm": 4.785336017608643,
+      "learning_rate": 4.3401682439537335e-05,
+      "loss": 0.9484,
+      "step": 1850
+    },
+    {
+      "epoch": 0.6653655074520937,
+      "grad_norm": 6.392651081085205,
+      "learning_rate": 4.323738170347003e-05,
+      "loss": 1.0558,
+      "step": 1875
+    },
+    {
+      "epoch": 0.674237047551455,
+      "grad_norm": 5.654791831970215,
+      "learning_rate": 4.307308096740274e-05,
+      "loss": 0.904,
+      "step": 1900
+    },
+    {
+      "epoch": 0.6831085876508162,
+      "grad_norm": 8.35096263885498,
+      "learning_rate": 4.2908780231335435e-05,
+      "loss": 0.9147,
+      "step": 1925
+    },
+    {
+      "epoch": 0.6919801277501775,
+      "grad_norm": 6.575385570526123,
+      "learning_rate": 4.274447949526814e-05,
+      "loss": 0.9009,
+      "step": 1950
+    },
+    {
+      "epoch": 0.7008516678495387,
+      "grad_norm": 5.978839874267578,
+      "learning_rate": 4.2580178759200844e-05,
+      "loss": 0.9722,
+      "step": 1975
+    },
+    {
+      "epoch": 0.7097232079489,
+      "grad_norm": 5.449729919433594,
+      "learning_rate": 4.241587802313355e-05,
+      "loss": 0.8602,
+      "step": 2000
+    },
+    {
+      "epoch": 0.7185947480482612,
+      "grad_norm": 4.398986339569092,
+      "learning_rate": 4.2251577287066246e-05,
+      "loss": 0.9699,
+      "step": 2025
+    },
+    {
+      "epoch": 0.7274662881476224,
+      "grad_norm": 7.566812515258789,
+      "learning_rate": 4.208727655099895e-05,
+      "loss": 0.8767,
+      "step": 2050
+    },
+    {
+      "epoch": 0.7363378282469837,
+      "grad_norm": 4.754166603088379,
+      "learning_rate": 4.1922975814931655e-05,
+      "loss": 0.9233,
+      "step": 2075
+    },
+    {
+      "epoch": 0.7452093683463449,
+      "grad_norm": 8.481706619262695,
+      "learning_rate": 4.175867507886436e-05,
+      "loss": 0.7744,
+      "step": 2100
+    },
+    {
+      "epoch": 0.7540809084457062,
+      "grad_norm": 5.274106502532959,
+      "learning_rate": 4.159437434279706e-05,
+      "loss": 0.9135,
+      "step": 2125
+    },
+    {
+      "epoch": 0.7629524485450674,
+      "grad_norm": 5.358260631561279,
+      "learning_rate": 4.1430073606729755e-05,
+      "loss": 0.8705,
+      "step": 2150
+    },
+    {
+      "epoch": 0.7718239886444287,
+      "grad_norm": 7.29016637802124,
+      "learning_rate": 4.126577287066246e-05,
+      "loss": 0.9884,
+      "step": 2175
+    },
+    {
+      "epoch": 0.7806955287437899,
+      "grad_norm": 4.230587005615234,
+      "learning_rate": 4.1101472134595165e-05,
+      "loss": 1.0909,
+      "step": 2200
+    },
+    {
+      "epoch": 0.7895670688431512,
+      "grad_norm": 3.3235785961151123,
+      "learning_rate": 4.093717139852787e-05,
+      "loss": 0.9675,
+      "step": 2225
+    },
+    {
+      "epoch": 0.7984386089425124,
+      "grad_norm": 6.612360954284668,
+      "learning_rate": 4.0772870662460574e-05,
+      "loss": 0.8865,
+      "step": 2250
+    },
+    {
+      "epoch": 0.8073101490418737,
+      "grad_norm": 3.8408925533294678,
+      "learning_rate": 4.060856992639327e-05,
+      "loss": 0.9141,
+      "step": 2275
+    },
+    {
+      "epoch": 0.8161816891412349,
+      "grad_norm": 8.467307090759277,
+      "learning_rate": 4.0444269190325976e-05,
+      "loss": 0.9242,
+      "step": 2300
+    },
+    {
+      "epoch": 0.8250532292405962,
+      "grad_norm": 6.088973522186279,
+      "learning_rate": 4.0279968454258674e-05,
+      "loss": 0.9406,
+      "step": 2325
+    },
+    {
+      "epoch": 0.8339247693399574,
+      "grad_norm": 5.104999542236328,
+      "learning_rate": 4.011566771819138e-05,
+      "loss": 0.9163,
+      "step": 2350
+    },
+    {
+      "epoch": 0.8427963094393187,
+      "grad_norm": 5.350615501403809,
+      "learning_rate": 3.995136698212408e-05,
+      "loss": 0.892,
+      "step": 2375
+    },
+    {
+      "epoch": 0.8516678495386799,
+      "grad_norm": 4.470179557800293,
+      "learning_rate": 3.978706624605679e-05,
+      "loss": 0.9243,
+      "step": 2400
+    },
+    {
+      "epoch": 0.8605393896380412,
+      "grad_norm": 7.627182483673096,
+      "learning_rate": 3.9622765509989485e-05,
+      "loss": 0.9249,
+      "step": 2425
+    },
+    {
+      "epoch": 0.8694109297374024,
+      "grad_norm": 3.69128155708313,
+      "learning_rate": 3.945846477392219e-05,
+      "loss": 0.9161,
+      "step": 2450
+    },
+    {
+      "epoch": 0.8782824698367636,
+      "grad_norm": 4.180213928222656,
+      "learning_rate": 3.9294164037854894e-05,
+      "loss": 0.8598,
+      "step": 2475
+    },
+    {
+      "epoch": 0.8871540099361249,
+      "grad_norm": 5.930824279785156,
+      "learning_rate": 3.912986330178759e-05,
+      "loss": 0.9908,
+      "step": 2500
+    },
+    {
+      "epoch": 0.8960255500354861,
+      "grad_norm": 5.566065311431885,
+      "learning_rate": 3.8965562565720296e-05,
+      "loss": 0.9378,
+      "step": 2525
+    },
+    {
+      "epoch": 0.9048970901348474,
+      "grad_norm": 5.347686767578125,
+      "learning_rate": 3.8801261829652994e-05,
+      "loss": 0.9627,
+      "step": 2550
+    },
+    {
+      "epoch": 0.9137686302342086,
+      "grad_norm": 4.1419525146484375,
+      "learning_rate": 3.86369610935857e-05,
+      "loss": 0.7985,
+      "step": 2575
+    },
+    {
+      "epoch": 0.9226401703335699,
+      "grad_norm": 4.296880722045898,
+      "learning_rate": 3.84726603575184e-05,
+      "loss": 0.8847,
+      "step": 2600
+    },
+    {
+      "epoch": 0.9315117104329311,
+      "grad_norm": 5.749390602111816,
+      "learning_rate": 3.830835962145111e-05,
+      "loss": 0.8881,
+      "step": 2625
+    },
+    {
+      "epoch": 0.9403832505322924,
+      "grad_norm": 4.67301607131958,
+      "learning_rate": 3.814405888538381e-05,
+      "loss": 0.9763,
+      "step": 2650
+    },
+    {
+      "epoch": 0.9492547906316536,
+      "grad_norm": 7.461897373199463,
+      "learning_rate": 3.7979758149316517e-05,
+      "loss": 0.9073,
+      "step": 2675
+    },
+    {
+      "epoch": 0.9581263307310149,
+      "grad_norm": 5.127830982208252,
+      "learning_rate": 3.7815457413249214e-05,
+      "loss": 0.8915,
+      "step": 2700
+    },
+    {
+      "epoch": 0.9669978708303761,
+      "grad_norm": 3.610241413116455,
+      "learning_rate": 3.765115667718191e-05,
+      "loss": 0.8039,
+      "step": 2725
+    },
+    {
+      "epoch": 0.9758694109297374,
+      "grad_norm": 7.237741470336914,
+      "learning_rate": 3.7486855941114617e-05,
+      "loss": 0.8631,
+      "step": 2750
+    },
+    {
+      "epoch": 0.9847409510290986,
+      "grad_norm": 5.400920867919922,
+      "learning_rate": 3.732255520504732e-05,
+      "loss": 0.9269,
+      "step": 2775
+    },
+    {
+      "epoch": 0.99361249112846,
+      "grad_norm": 7.000077247619629,
+      "learning_rate": 3.7158254468980026e-05,
+      "loss": 0.8502,
+      "step": 2800
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6351375332741792,
+      "eval_f1_macro": 0.646122256511988,
+      "eval_f1_micro": 0.6351375332741792,
+      "eval_f1_weighted": 0.64322619559571,
+      "eval_loss": 0.8854864835739136,
+      "eval_precision_macro": 0.7018139832423136,
+      "eval_precision_micro": 0.6351375332741792,
+      "eval_precision_weighted": 0.6951112379231563,
+      "eval_recall_macro": 0.6347383469069486,
+      "eval_recall_micro": 0.6351375332741792,
+      "eval_recall_weighted": 0.6351375332741792,
+      "eval_runtime": 1148.734,
+      "eval_samples_per_second": 4.905,
+      "eval_steps_per_second": 0.307,
+      "step": 2818
+    },
+    {
+      "epoch": 1.0024840312278211,
+      "grad_norm": 2.3094370365142822,
+      "learning_rate": 3.6993953732912723e-05,
+      "loss": 0.7619,
+      "step": 2825
+    },
+    {
+      "epoch": 1.0113555713271825,
+      "grad_norm": 6.9726386070251465,
+      "learning_rate": 3.682965299684543e-05,
+      "loss": 0.8545,
+      "step": 2850
+    },
+    {
+      "epoch": 1.0202271114265435,
+      "grad_norm": 6.961376667022705,
+      "learning_rate": 3.666535226077813e-05,
+      "loss": 0.8677,
+      "step": 2875
+    },
+    {
+      "epoch": 1.0290986515259049,
+      "grad_norm": 5.254246234893799,
+      "learning_rate": 3.650105152471083e-05,
+      "loss": 0.9094,
+      "step": 2900
+    },
+    {
+      "epoch": 1.0379701916252662,
+      "grad_norm": 4.578902244567871,
+      "learning_rate": 3.6336750788643535e-05,
+      "loss": 0.8537,
+      "step": 2925
+    },
+    {
+      "epoch": 1.0468417317246275,
+      "grad_norm": 3.2812981605529785,
+      "learning_rate": 3.617245005257624e-05,
+      "loss": 0.9155,
+      "step": 2950
+    },
+    {
+      "epoch": 1.0557132718239886,
+      "grad_norm": 4.157433032989502,
+      "learning_rate": 3.600814931650894e-05,
+      "loss": 0.8686,
+      "step": 2975
+    },
+    {
+      "epoch": 1.0645848119233499,
+      "grad_norm": 8.107535362243652,
+      "learning_rate": 3.584384858044164e-05,
+      "loss": 0.8142,
+      "step": 3000
+    },
+    {
+      "epoch": 1.0734563520227112,
+      "grad_norm": 4.729946613311768,
+      "learning_rate": 3.5679547844374346e-05,
+      "loss": 0.8848,
+      "step": 3025
+    },
+    {
+      "epoch": 1.0823278921220725,
+      "grad_norm": 4.649665832519531,
+      "learning_rate": 3.551524710830705e-05,
+      "loss": 0.82,
+      "step": 3050
+    },
+    {
+      "epoch": 1.0911994322214336,
+      "grad_norm": 6.006737232208252,
+      "learning_rate": 3.535094637223975e-05,
+      "loss": 0.928,
+      "step": 3075
+    },
+    {
+      "epoch": 1.1000709723207949,
+      "grad_norm": 6.667446613311768,
+      "learning_rate": 3.518664563617245e-05,
+      "loss": 0.8371,
+      "step": 3100
+    },
+    {
+      "epoch": 1.1089425124201562,
+      "grad_norm": 5.0947418212890625,
+      "learning_rate": 3.502234490010515e-05,
+      "loss": 0.807,
+      "step": 3125
+    },
+    {
+      "epoch": 1.1178140525195175,
+      "grad_norm": 7.8628129959106445,
+      "learning_rate": 3.4858044164037855e-05,
+      "loss": 0.8762,
+      "step": 3150
+    },
+    {
+      "epoch": 1.1266855926188786,
+      "grad_norm": 7.366974830627441,
+      "learning_rate": 3.469374342797056e-05,
+      "loss": 0.9567,
+      "step": 3175
+    },
+    {
+      "epoch": 1.1355571327182399,
+      "grad_norm": 7.182910442352295,
+      "learning_rate": 3.4529442691903264e-05,
+      "loss": 0.8733,
+      "step": 3200
+    },
+    {
+      "epoch": 1.1444286728176012,
+      "grad_norm": 4.816949844360352,
+      "learning_rate": 3.436514195583596e-05,
+      "loss": 0.9105,
+      "step": 3225
+    },
+    {
+      "epoch": 1.1533002129169625,
+      "grad_norm": 5.3113813400268555,
+      "learning_rate": 3.4200841219768666e-05,
+      "loss": 0.9543,
+      "step": 3250
+    },
+    {
+      "epoch": 1.1621717530163236,
+      "grad_norm": 8.444610595703125,
+      "learning_rate": 3.403654048370137e-05,
+      "loss": 0.8729,
+      "step": 3275
+    },
+    {
+      "epoch": 1.171043293115685,
+      "grad_norm": 13.175244331359863,
+      "learning_rate": 3.387223974763407e-05,
+      "loss": 0.84,
+      "step": 3300
+    },
+    {
+      "epoch": 1.1799148332150462,
+      "grad_norm": 4.161907196044922,
+      "learning_rate": 3.370793901156677e-05,
+      "loss": 0.8264,
+      "step": 3325
+    },
+    {
+      "epoch": 1.1887863733144073,
+      "grad_norm": 5.216485977172852,
+      "learning_rate": 3.354363827549948e-05,
+      "loss": 0.8398,
+      "step": 3350
+    },
+    {
+      "epoch": 1.1976579134137686,
+      "grad_norm": 5.230435371398926,
+      "learning_rate": 3.3379337539432175e-05,
+      "loss": 0.9218,
+      "step": 3375
+    },
+    {
+      "epoch": 1.20652945351313,
+      "grad_norm": 6.927298069000244,
+      "learning_rate": 3.321503680336488e-05,
+      "loss": 0.8361,
+      "step": 3400
+    },
+    {
+      "epoch": 1.2154009936124912,
+      "grad_norm": 8.193685531616211,
+      "learning_rate": 3.3050736067297585e-05,
+      "loss": 0.8125,
+      "step": 3425
+    },
+    {
+      "epoch": 1.2242725337118523,
+      "grad_norm": 4.92389440536499,
+      "learning_rate": 3.288643533123029e-05,
+      "loss": 0.9335,
+      "step": 3450
+    },
+    {
+      "epoch": 1.2331440738112136,
+      "grad_norm": 7.39652156829834,
+      "learning_rate": 3.272213459516299e-05,
+      "loss": 0.8435,
+      "step": 3475
+    },
+    {
+      "epoch": 1.242015613910575,
+      "grad_norm": 5.768881320953369,
+      "learning_rate": 3.2557833859095685e-05,
+      "loss": 0.8422,
+      "step": 3500
+    },
+    {
+      "epoch": 1.250887154009936,
+      "grad_norm": 6.411141872406006,
+      "learning_rate": 3.239353312302839e-05,
+      "loss": 0.8596,
+      "step": 3525
+    },
+    {
+      "epoch": 1.2597586941092973,
+      "grad_norm": 2.415675163269043,
+      "learning_rate": 3.2229232386961094e-05,
+      "loss": 0.761,
+      "step": 3550
+    },
+    {
+      "epoch": 1.2686302342086586,
+      "grad_norm": 4.490350246429443,
+      "learning_rate": 3.20649316508938e-05,
+      "loss": 0.8352,
+      "step": 3575
+    },
+    {
+      "epoch": 1.27750177430802,
+      "grad_norm": 6.4845871925354,
+      "learning_rate": 3.19006309148265e-05,
+      "loss": 0.9389,
+      "step": 3600
+    },
+    {
+      "epoch": 1.2863733144073812,
+      "grad_norm": 9.419644355773926,
+      "learning_rate": 3.173633017875921e-05,
+      "loss": 0.8249,
+      "step": 3625
+    },
+    {
+      "epoch": 1.2952448545067423,
+      "grad_norm": 4.427757740020752,
+      "learning_rate": 3.1572029442691905e-05,
+      "loss": 0.8947,
+      "step": 3650
+    },
+    {
+      "epoch": 1.3041163946061036,
+      "grad_norm": 5.791045665740967,
+      "learning_rate": 3.140772870662461e-05,
+      "loss": 0.9288,
+      "step": 3675
+    },
+    {
+      "epoch": 1.312987934705465,
+      "grad_norm": 6.885122299194336,
+      "learning_rate": 3.124342797055731e-05,
+      "loss": 0.8869,
+      "step": 3700
+    },
+    {
+      "epoch": 1.321859474804826,
+      "grad_norm": 4.827605247497559,
+      "learning_rate": 3.107912723449001e-05,
+      "loss": 0.8798,
+      "step": 3725
+    },
+    {
+      "epoch": 1.3307310149041873,
+      "grad_norm": 5.087188243865967,
+      "learning_rate": 3.0914826498422716e-05,
+      "loss": 0.8865,
+      "step": 3750
+    },
+    {
+      "epoch": 1.3396025550035486,
+      "grad_norm": 6.508386135101318,
+      "learning_rate": 3.0750525762355414e-05,
+      "loss": 0.769,
+      "step": 3775
+    },
+    {
+      "epoch": 1.34847409510291,
+      "grad_norm": 3.5530879497528076,
+      "learning_rate": 3.058622502628812e-05,
+      "loss": 0.8959,
+      "step": 3800
+    },
+    {
+      "epoch": 1.3573456352022713,
+      "grad_norm": 5.191397666931152,
+      "learning_rate": 3.042192429022082e-05,
+      "loss": 0.9568,
+      "step": 3825
+    },
+    {
+      "epoch": 1.3662171753016323,
+      "grad_norm": 6.676424980163574,
+      "learning_rate": 3.0257623554153524e-05,
+      "loss": 0.9181,
+      "step": 3850
+    },
+    {
+      "epoch": 1.3750887154009936,
+      "grad_norm": 3.397516965866089,
+      "learning_rate": 3.009332281808623e-05,
+      "loss": 0.7627,
+      "step": 3875
+    },
+    {
+      "epoch": 1.3839602555003547,
+      "grad_norm": 6.955685615539551,
+      "learning_rate": 2.9929022082018933e-05,
+      "loss": 0.8065,
+      "step": 3900
+    },
+    {
+      "epoch": 1.392831795599716,
+      "grad_norm": 4.397638320922852,
+      "learning_rate": 2.976472134595163e-05,
+      "loss": 0.9026,
+      "step": 3925
+    },
+    {
+      "epoch": 1.4017033356990773,
+      "grad_norm": 4.824989318847656,
+      "learning_rate": 2.9600420609884332e-05,
+      "loss": 0.8852,
+      "step": 3950
+    },
+    {
+      "epoch": 1.4105748757984387,
+      "grad_norm": 6.728146076202393,
+      "learning_rate": 2.9436119873817037e-05,
+      "loss": 0.8408,
+      "step": 3975
+    },
+    {
+      "epoch": 1.4194464158978,
+      "grad_norm": 16.979982376098633,
+      "learning_rate": 2.927181913774974e-05,
+      "loss": 0.9352,
+      "step": 4000
+    },
+    {
+      "epoch": 1.428317955997161,
+      "grad_norm": 6.757816791534424,
+      "learning_rate": 2.9107518401682442e-05,
+      "loss": 0.89,
+      "step": 4025
+    },
+    {
+      "epoch": 1.4371894960965224,
+      "grad_norm": 9.163891792297363,
+      "learning_rate": 2.894321766561514e-05,
+      "loss": 0.9316,
+      "step": 4050
+    },
+    {
+      "epoch": 1.4460610361958837,
+      "grad_norm": 9.104488372802734,
+      "learning_rate": 2.8778916929547845e-05,
+      "loss": 0.9046,
+      "step": 4075
+    },
+    {
+      "epoch": 1.4549325762952448,
+      "grad_norm": 6.510610580444336,
+      "learning_rate": 2.861461619348055e-05,
+      "loss": 0.7608,
+      "step": 4100
+    },
+    {
+      "epoch": 1.463804116394606,
+      "grad_norm": 6.328439235687256,
+      "learning_rate": 2.845031545741325e-05,
+      "loss": 1.0008,
+      "step": 4125
+    },
+    {
+      "epoch": 1.4726756564939674,
+      "grad_norm": 6.498648166656494,
+      "learning_rate": 2.8286014721345955e-05,
+      "loss": 0.918,
+      "step": 4150
+    },
+    {
+      "epoch": 1.4815471965933287,
+      "grad_norm": 6.506234169006348,
+      "learning_rate": 2.8121713985278653e-05,
+      "loss": 0.8831,
+      "step": 4175
+    },
+    {
+      "epoch": 1.49041873669269,
+      "grad_norm": 3.214972734451294,
+      "learning_rate": 2.7957413249211357e-05,
+      "loss": 0.8107,
+      "step": 4200
+    },
+    {
+      "epoch": 1.499290276792051,
+      "grad_norm": 4.053207874298096,
+      "learning_rate": 2.7793112513144058e-05,
+      "loss": 0.7369,
+      "step": 4225
+    },
+    {
+      "epoch": 1.5081618168914124,
+      "grad_norm": 7.692103385925293,
+      "learning_rate": 2.7628811777076763e-05,
+      "loss": 0.8413,
+      "step": 4250
+    },
+    {
+      "epoch": 1.5170333569907735,
+      "grad_norm": 7.168026447296143,
+      "learning_rate": 2.7464511041009467e-05,
+      "loss": 0.7727,
+      "step": 4275
+    },
+    {
+      "epoch": 1.5259048970901348,
+      "grad_norm": 37.76559066772461,
+      "learning_rate": 2.730021030494217e-05,
+      "loss": 0.896,
+      "step": 4300
+    },
+    {
+      "epoch": 1.534776437189496,
+      "grad_norm": 7.820963382720947,
+      "learning_rate": 2.7135909568874866e-05,
+      "loss": 0.9137,
+      "step": 4325
+    },
+    {
+      "epoch": 1.5436479772888574,
+      "grad_norm": 5.012437343597412,
+      "learning_rate": 2.697160883280757e-05,
+      "loss": 0.8227,
+      "step": 4350
+    },
+    {
+      "epoch": 1.5525195173882187,
+      "grad_norm": 4.973189353942871,
+      "learning_rate": 2.6807308096740275e-05,
+      "loss": 0.7805,
+      "step": 4375
+    },
+    {
+      "epoch": 1.56139105748758,
+      "grad_norm": 5.134377956390381,
+      "learning_rate": 2.664300736067298e-05,
+      "loss": 0.8911,
+      "step": 4400
+    },
+    {
+      "epoch": 1.570262597586941,
+      "grad_norm": 4.107070446014404,
+      "learning_rate": 2.647870662460568e-05,
+      "loss": 0.9355,
+      "step": 4425
+    },
+    {
+      "epoch": 1.5791341376863024,
+      "grad_norm": 4.052985191345215,
+      "learning_rate": 2.631440588853838e-05,
+      "loss": 0.9195,
+      "step": 4450
+    },
+    {
+      "epoch": 1.5880056777856635,
+      "grad_norm": 6.549808025360107,
+      "learning_rate": 2.6150105152471083e-05,
+      "loss": 0.8856,
+      "step": 4475
+    },
+    {
+      "epoch": 1.5968772178850248,
+      "grad_norm": 4.466334819793701,
+      "learning_rate": 2.5985804416403788e-05,
+      "loss": 0.9777,
+      "step": 4500
+    },
+    {
+      "epoch": 1.605748757984386,
+      "grad_norm": 4.111423015594482,
+      "learning_rate": 2.582150368033649e-05,
+      "loss": 0.8266,
+      "step": 4525
+    },
+    {
+      "epoch": 1.6146202980837474,
+      "grad_norm": 5.717258453369141,
+      "learning_rate": 2.5657202944269193e-05,
+      "loss": 0.773,
+      "step": 4550
+    },
+    {
+      "epoch": 1.6234918381831087,
+      "grad_norm": 5.022154331207275,
+      "learning_rate": 2.5492902208201898e-05,
+      "loss": 0.8747,
+      "step": 4575
+    },
+    {
+      "epoch": 1.6323633782824698,
+      "grad_norm": 3.223011016845703,
+      "learning_rate": 2.5328601472134596e-05,
+      "loss": 0.8246,
+      "step": 4600
+    },
+    {
+      "epoch": 1.641234918381831,
+      "grad_norm": 5.4209303855896,
+      "learning_rate": 2.5164300736067297e-05,
+      "loss": 0.8347,
+      "step": 4625
+    },
+    {
+      "epoch": 1.6501064584811922,
+      "grad_norm": 6.209916114807129,
+      "learning_rate": 2.5e-05,
+      "loss": 0.946,
+      "step": 4650
+    },
+    {
+      "epoch": 1.6589779985805535,
+      "grad_norm": 4.774755477905273,
+      "learning_rate": 2.4835699263932706e-05,
+      "loss": 0.8054,
+      "step": 4675
+    },
+    {
+      "epoch": 1.6678495386799148,
+      "grad_norm": 7.4865193367004395,
+      "learning_rate": 2.4671398527865407e-05,
+      "loss": 0.7584,
+      "step": 4700
+    },
+    {
+      "epoch": 1.6767210787792761,
+      "grad_norm": 6.235969543457031,
+      "learning_rate": 2.4507097791798108e-05,
+      "loss": 0.7856,
+      "step": 4725
+    },
+    {
+      "epoch": 1.6855926188786374,
+      "grad_norm": 5.785863399505615,
+      "learning_rate": 2.4342797055730813e-05,
+      "loss": 0.8978,
+      "step": 4750
+    },
+    {
+      "epoch": 1.6944641589779987,
+      "grad_norm": 4.585093021392822,
+      "learning_rate": 2.4178496319663514e-05,
+      "loss": 0.6833,
+      "step": 4775
+    },
+    {
+      "epoch": 1.7033356990773598,
+      "grad_norm": 5.399811744689941,
+      "learning_rate": 2.4014195583596215e-05,
+      "loss": 0.7674,
+      "step": 4800
+    },
+    {
+      "epoch": 1.7122072391767211,
+      "grad_norm": 5.478691101074219,
+      "learning_rate": 2.3849894847528916e-05,
+      "loss": 0.8765,
+      "step": 4825
+    },
+    {
+      "epoch": 1.7210787792760822,
+      "grad_norm": 5.945518970489502,
+      "learning_rate": 2.368559411146162e-05,
+      "loss": 0.8706,
+      "step": 4850
+    },
+    {
+      "epoch": 1.7299503193754435,
+      "grad_norm": 4.882993698120117,
+      "learning_rate": 2.3521293375394325e-05,
+      "loss": 0.7293,
+      "step": 4875
+    },
+    {
+      "epoch": 1.7388218594748048,
+      "grad_norm": 6.987505912780762,
+      "learning_rate": 2.3356992639327023e-05,
+      "loss": 0.7372,
+      "step": 4900
+    },
+    {
+      "epoch": 1.7476933995741661,
+      "grad_norm": 4.7848286628723145,
+      "learning_rate": 2.3192691903259727e-05,
+      "loss": 0.9114,
+      "step": 4925
+    },
+    {
+      "epoch": 1.7565649396735274,
+      "grad_norm": 6.290490627288818,
+      "learning_rate": 2.3028391167192432e-05,
+      "loss": 0.8896,
+      "step": 4950
+    },
+    {
+      "epoch": 1.7654364797728885,
+      "grad_norm": 6.204107284545898,
+      "learning_rate": 2.2864090431125133e-05,
+      "loss": 0.7874,
+      "step": 4975
+    },
+    {
+      "epoch": 1.7743080198722498,
+      "grad_norm": 5.911350250244141,
+      "learning_rate": 2.2699789695057834e-05,
+      "loss": 0.8717,
+      "step": 5000
+    },
+    {
+      "epoch": 1.783179559971611,
+      "grad_norm": 6.586339473724365,
+      "learning_rate": 2.2535488958990535e-05,
+      "loss": 0.924,
+      "step": 5025
+    },
+    {
+      "epoch": 1.7920511000709722,
+      "grad_norm": 7.093676567077637,
+      "learning_rate": 2.237118822292324e-05,
+      "loss": 0.8317,
+      "step": 5050
+    },
+    {
+      "epoch": 1.8009226401703335,
+      "grad_norm": 7.202934265136719,
+      "learning_rate": 2.2206887486855944e-05,
+      "loss": 0.7207,
+      "step": 5075
+    },
+    {
+      "epoch": 1.8097941802696949,
+      "grad_norm": 9.095272064208984,
+      "learning_rate": 2.2042586750788642e-05,
+      "loss": 0.7929,
+      "step": 5100
+    },
+    {
+      "epoch": 1.8186657203690562,
+      "grad_norm": 6.040229320526123,
+      "learning_rate": 2.1878286014721347e-05,
+      "loss": 0.9413,
+      "step": 5125
+    },
+    {
+      "epoch": 1.8275372604684175,
+      "grad_norm": 4.86398983001709,
+      "learning_rate": 2.171398527865405e-05,
+      "loss": 0.8813,
+      "step": 5150
+    },
+    {
+      "epoch": 1.8364088005677786,
+      "grad_norm": 4.922494888305664,
+      "learning_rate": 2.1549684542586752e-05,
+      "loss": 0.8695,
+      "step": 5175
+    },
+    {
+      "epoch": 1.8452803406671399,
+      "grad_norm": 6.124248504638672,
+      "learning_rate": 2.1385383806519453e-05,
+      "loss": 0.7623,
+      "step": 5200
+    },
+    {
+      "epoch": 1.854151880766501,
+      "grad_norm": 5.949371814727783,
+      "learning_rate": 2.1221083070452158e-05,
+      "loss": 0.8198,
+      "step": 5225
+    },
+    {
+      "epoch": 1.8630234208658623,
+      "grad_norm": 5.420801162719727,
+      "learning_rate": 2.105678233438486e-05,
+      "loss": 0.8091,
+      "step": 5250
+    },
+    {
+      "epoch": 1.8718949609652236,
+      "grad_norm": 4.861514091491699,
+      "learning_rate": 2.0892481598317563e-05,
+      "loss": 0.7612,
+      "step": 5275
+    },
+    {
+      "epoch": 1.8807665010645849,
+      "grad_norm": 7.055742263793945,
+      "learning_rate": 2.072818086225026e-05,
+      "loss": 0.9407,
+      "step": 5300
+    },
+    {
+      "epoch": 1.8896380411639462,
+      "grad_norm": 5.109190940856934,
+      "learning_rate": 2.0563880126182966e-05,
+      "loss": 0.835,
+      "step": 5325
+    },
+    {
+      "epoch": 1.8985095812633073,
+      "grad_norm": 7.075034141540527,
+      "learning_rate": 2.039957939011567e-05,
+      "loss": 0.7638,
+      "step": 5350
+    },
+    {
+      "epoch": 1.9073811213626686,
+      "grad_norm": 10.149177551269531,
+      "learning_rate": 2.023527865404837e-05,
+      "loss": 0.7629,
+      "step": 5375
+    },
+    {
+      "epoch": 1.9162526614620297,
+      "grad_norm": 5.505281448364258,
+      "learning_rate": 2.0070977917981073e-05,
+      "loss": 0.8724,
+      "step": 5400
+    },
+    {
+      "epoch": 1.925124201561391,
+      "grad_norm": 4.993834495544434,
+      "learning_rate": 1.9906677181913777e-05,
+      "loss": 0.8732,
+      "step": 5425
+    },
+    {
+      "epoch": 1.9339957416607523,
+      "grad_norm": 8.533299446105957,
+      "learning_rate": 1.9742376445846478e-05,
+      "loss": 0.7291,
+      "step": 5450
+    },
+    {
+      "epoch": 1.9428672817601136,
+      "grad_norm": 5.450367450714111,
+      "learning_rate": 1.9578075709779183e-05,
+      "loss": 0.8014,
+      "step": 5475
+    },
+    {
+      "epoch": 1.951738821859475,
+      "grad_norm": 6.007541179656982,
+      "learning_rate": 1.941377497371188e-05,
+      "loss": 0.7869,
+      "step": 5500
+    },
+    {
+      "epoch": 1.9606103619588362,
+      "grad_norm": 4.755675792694092,
+      "learning_rate": 1.9249474237644585e-05,
+      "loss": 0.7357,
+      "step": 5525
+    },
+    {
+      "epoch": 1.9694819020581973,
+      "grad_norm": 6.058708667755127,
+      "learning_rate": 1.908517350157729e-05,
+      "loss": 0.8856,
+      "step": 5550
+    },
+    {
+      "epoch": 1.9783534421575586,
+      "grad_norm": 5.893248558044434,
+      "learning_rate": 1.892087276550999e-05,
+      "loss": 0.7709,
+      "step": 5575
+    },
+    {
+      "epoch": 1.9872249822569197,
+      "grad_norm": 6.941923141479492,
+      "learning_rate": 1.8756572029442692e-05,
+      "loss": 0.8291,
+      "step": 5600
+    },
+    {
+      "epoch": 1.996096522356281,
+      "grad_norm": 5.693519115447998,
+      "learning_rate": 1.8592271293375396e-05,
+      "loss": 0.7554,
+      "step": 5625
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.6713398402839397,
+      "eval_f1_macro": 0.6784994509782545,
+      "eval_f1_micro": 0.6713398402839397,
+      "eval_f1_weighted": 0.6752966842367127,
+      "eval_loss": 0.7940883040428162,
+      "eval_precision_macro": 0.6994216245852686,
+      "eval_precision_micro": 0.6713398402839397,
+      "eval_precision_weighted": 0.6949112315057265,
+      "eval_recall_macro": 0.6730557962224918,
+      "eval_recall_micro": 0.6713398402839397,
+      "eval_recall_weighted": 0.6713398402839397,
+      "eval_runtime": 1183.1474,
+      "eval_samples_per_second": 4.763,
+      "eval_steps_per_second": 0.298,
+      "step": 5636
+    },
+    {
+      "epoch": 2.0049680624556423,
+      "grad_norm": 6.007699489593506,
+      "learning_rate": 1.8427970557308097e-05,
+      "loss": 0.7755,
+      "step": 5650
+    },
+    {
+      "epoch": 2.0138396025550036,
+      "grad_norm": 6.981593132019043,
+      "learning_rate": 1.82636698212408e-05,
+      "loss": 0.8676,
+      "step": 5675
+    },
+    {
+      "epoch": 2.022711142654365,
+      "grad_norm": 4.4236040115356445,
+      "learning_rate": 1.8099369085173503e-05,
+      "loss": 0.7124,
+      "step": 5700
+    },
+    {
+      "epoch": 2.031582682753726,
+      "grad_norm": 6.685351371765137,
+      "learning_rate": 1.7935068349106204e-05,
+      "loss": 0.864,
+      "step": 5725
+    },
+    {
+      "epoch": 2.040454222853087,
+      "grad_norm": 6.873079776763916,
+      "learning_rate": 1.777076761303891e-05,
+      "loss": 0.8414,
+      "step": 5750
+    },
+    {
+      "epoch": 2.0493257629524484,
+      "grad_norm": 7.3745317459106445,
+      "learning_rate": 1.760646687697161e-05,
+      "loss": 0.7731,
+      "step": 5775
+    },
+    {
+      "epoch": 2.0581973030518097,
+      "grad_norm": 4.843000411987305,
+      "learning_rate": 1.744216614090431e-05,
+      "loss": 0.6793,
+      "step": 5800
+    },
+    {
+      "epoch": 2.067068843151171,
+      "grad_norm": 8.414238929748535,
+      "learning_rate": 1.7277865404837016e-05,
+      "loss": 0.808,
+      "step": 5825
+    },
+    {
+      "epoch": 2.0759403832505323,
+      "grad_norm": 4.896043300628662,
+      "learning_rate": 1.7113564668769717e-05,
+      "loss": 0.7711,
+      "step": 5850
+    },
+    {
+      "epoch": 2.0848119233498936,
+      "grad_norm": 5.479866981506348,
+      "learning_rate": 1.6949263932702418e-05,
+      "loss": 0.8178,
+      "step": 5875
+    },
+    {
+      "epoch": 2.093683463449255,
+      "grad_norm": 5.603784561157227,
+      "learning_rate": 1.6784963196635122e-05,
+      "loss": 0.647,
+      "step": 5900
+    },
+    {
+      "epoch": 2.1025550035486162,
+      "grad_norm": 4.867652893066406,
+      "learning_rate": 1.6620662460567824e-05,
+      "loss": 0.795,
+      "step": 5925
+    },
+    {
+      "epoch": 2.111426543647977,
+      "grad_norm": 5.600367069244385,
+      "learning_rate": 1.6456361724500528e-05,
+      "loss": 0.8532,
+      "step": 5950
+    },
+    {
+      "epoch": 2.1202980837473384,
+      "grad_norm": 7.279195308685303,
+      "learning_rate": 1.629206098843323e-05,
+      "loss": 0.6361,
+      "step": 5975
+    },
+    {
+      "epoch": 2.1291696238466997,
+      "grad_norm": 11.310209274291992,
+      "learning_rate": 1.612776025236593e-05,
+      "loss": 0.7322,
+      "step": 6000
+    },
+    {
+      "epoch": 2.138041163946061,
+      "grad_norm": 8.624905586242676,
+      "learning_rate": 1.5963459516298635e-05,
+      "loss": 0.8561,
+      "step": 6025
+    },
+    {
+      "epoch": 2.1469127040454223,
+      "grad_norm": 7.717365741729736,
+      "learning_rate": 1.5799158780231336e-05,
+      "loss": 0.7117,
+      "step": 6050
+    },
+    {
+      "epoch": 2.1557842441447836,
+      "grad_norm": 5.242558479309082,
+      "learning_rate": 1.5634858044164037e-05,
+      "loss": 0.8348,
+      "step": 6075
+    },
+    {
+      "epoch": 2.164655784244145,
+      "grad_norm": 8.414390563964844,
+      "learning_rate": 1.547055730809674e-05,
+      "loss": 0.8712,
+      "step": 6100
+    },
+    {
+      "epoch": 2.173527324343506,
+      "grad_norm": 4.2956461906433105,
+      "learning_rate": 1.5306256572029443e-05,
+      "loss": 0.8593,
+      "step": 6125
+    },
+    {
+      "epoch": 2.182398864442867,
+      "grad_norm": 5.302322864532471,
+      "learning_rate": 1.5141955835962146e-05,
+      "loss": 0.8543,
+      "step": 6150
+    },
+    {
+      "epoch": 2.1912704045422284,
+      "grad_norm": 8.463561058044434,
+      "learning_rate": 1.497765509989485e-05,
+      "loss": 0.7953,
+      "step": 6175
+    },
+    {
+      "epoch": 2.2001419446415897,
+      "grad_norm": 7.6317853927612305,
+      "learning_rate": 1.481335436382755e-05,
+      "loss": 0.8429,
+      "step": 6200
+    },
+    {
+      "epoch": 2.209013484740951,
+      "grad_norm": 8.177587509155273,
+      "learning_rate": 1.4649053627760254e-05,
+      "loss": 0.8472,
+      "step": 6225
+    },
+    {
+      "epoch": 2.2178850248403124,
+      "grad_norm": 9.943741798400879,
+      "learning_rate": 1.4484752891692954e-05,
+      "loss": 0.7034,
+      "step": 6250
+    },
+    {
+      "epoch": 2.2267565649396737,
+      "grad_norm": 5.518933296203613,
+      "learning_rate": 1.4320452155625658e-05,
+      "loss": 0.8279,
+      "step": 6275
+    },
+    {
+      "epoch": 2.235628105039035,
+      "grad_norm": 5.986506462097168,
+      "learning_rate": 1.4156151419558361e-05,
+      "loss": 0.7338,
+      "step": 6300
+    },
+    {
+      "epoch": 2.244499645138396,
+      "grad_norm": 6.913864612579346,
+      "learning_rate": 1.3991850683491062e-05,
+      "loss": 0.75,
+      "step": 6325
+    },
+    {
+      "epoch": 2.253371185237757,
+      "grad_norm": 14.090570449829102,
+      "learning_rate": 1.3827549947423765e-05,
+      "loss": 0.8477,
+      "step": 6350
+    },
+    {
+      "epoch": 2.2622427253371185,
+      "grad_norm": 6.265283584594727,
+      "learning_rate": 1.366324921135647e-05,
+      "loss": 0.8558,
+      "step": 6375
+    },
+    {
+      "epoch": 2.2711142654364798,
+      "grad_norm": 5.962440013885498,
+      "learning_rate": 1.3498948475289169e-05,
+      "loss": 0.7053,
+      "step": 6400
+    },
+    {
+      "epoch": 2.279985805535841,
+      "grad_norm": 6.866328239440918,
+      "learning_rate": 1.3334647739221873e-05,
+      "loss": 0.7223,
+      "step": 6425
+    },
+    {
+      "epoch": 2.2888573456352024,
+      "grad_norm": 9.418810844421387,
+      "learning_rate": 1.3170347003154576e-05,
+      "loss": 0.8068,
+      "step": 6450
+    },
+    {
+      "epoch": 2.2977288857345637,
+      "grad_norm": 10.400421142578125,
+      "learning_rate": 1.3006046267087277e-05,
+      "loss": 0.7997,
+      "step": 6475
+    },
+    {
+      "epoch": 2.306600425833925,
+      "grad_norm": 6.021897792816162,
+      "learning_rate": 1.284174553101998e-05,
+      "loss": 0.7601,
+      "step": 6500
+    },
+    {
+      "epoch": 2.315471965933286,
+      "grad_norm": 4.0054521560668945,
+      "learning_rate": 1.2677444794952681e-05,
+      "loss": 0.8377,
+      "step": 6525
+    },
+    {
+      "epoch": 2.324343506032647,
+      "grad_norm": 4.744009971618652,
+      "learning_rate": 1.2513144058885384e-05,
+      "loss": 0.8302,
+      "step": 6550
+    },
+    {
+      "epoch": 2.3332150461320085,
+      "grad_norm": 12.244515419006348,
+      "learning_rate": 1.2348843322818087e-05,
+      "loss": 0.797,
+      "step": 6575
+    },
+    {
+      "epoch": 2.34208658623137,
+      "grad_norm": 5.934150218963623,
+      "learning_rate": 1.218454258675079e-05,
+      "loss": 0.7326,
+      "step": 6600
+    },
+    {
+      "epoch": 2.350958126330731,
+      "grad_norm": 7.615633010864258,
+      "learning_rate": 1.2020241850683493e-05,
+      "loss": 0.6852,
+      "step": 6625
+    },
+    {
+      "epoch": 2.3598296664300924,
+      "grad_norm": 2.2285540103912354,
+      "learning_rate": 1.1855941114616194e-05,
+      "loss": 0.6879,
+      "step": 6650
+    },
+    {
+      "epoch": 2.3687012065294537,
+      "grad_norm": 9.67764949798584,
+      "learning_rate": 1.1691640378548897e-05,
+      "loss": 0.7886,
+      "step": 6675
+    },
+    {
+      "epoch": 2.3775727466288146,
+      "grad_norm": 12.79229736328125,
+      "learning_rate": 1.15273396424816e-05,
+      "loss": 0.8171,
+      "step": 6700
+    },
+    {
+      "epoch": 2.386444286728176,
+      "grad_norm": 8.917281150817871,
+      "learning_rate": 1.1363038906414302e-05,
+      "loss": 0.7607,
+      "step": 6725
+    },
+    {
+      "epoch": 2.395315826827537,
+      "grad_norm": 6.359137535095215,
+      "learning_rate": 1.1198738170347003e-05,
+      "loss": 0.7509,
+      "step": 6750
+    },
+    {
+      "epoch": 2.4041873669268985,
+      "grad_norm": 5.579113960266113,
+      "learning_rate": 1.1034437434279706e-05,
+      "loss": 0.7344,
+      "step": 6775
+    },
+    {
+      "epoch": 2.41305890702626,
+      "grad_norm": 5.791996002197266,
+      "learning_rate": 1.0870136698212409e-05,
+      "loss": 0.7222,
+      "step": 6800
+    },
+    {
+      "epoch": 2.421930447125621,
+      "grad_norm": 4.481837272644043,
+      "learning_rate": 1.070583596214511e-05,
+      "loss": 0.6455,
+      "step": 6825
+    },
+    {
+      "epoch": 2.4308019872249824,
+      "grad_norm": 5.7892069816589355,
+      "learning_rate": 1.0541535226077813e-05,
+      "loss": 0.6221,
+      "step": 6850
+    },
+    {
+      "epoch": 2.4396735273243433,
+      "grad_norm": 4.64011287689209,
+      "learning_rate": 1.0377234490010516e-05,
+      "loss": 0.6901,
+      "step": 6875
+    },
+    {
+      "epoch": 2.4485450674237046,
+      "grad_norm": 12.223133087158203,
+      "learning_rate": 1.0212933753943219e-05,
+      "loss": 0.7315,
+      "step": 6900
+    },
+    {
+      "epoch": 2.457416607523066,
+      "grad_norm": 7.667706489562988,
+      "learning_rate": 1.004863301787592e-05,
+      "loss": 0.7421,
+      "step": 6925
+    },
+    {
+      "epoch": 2.466288147622427,
+      "grad_norm": 6.26972770690918,
+      "learning_rate": 9.884332281808623e-06,
+      "loss": 0.8319,
+      "step": 6950
+    },
+    {
+      "epoch": 2.4751596877217885,
+      "grad_norm": 4.876287937164307,
+      "learning_rate": 9.720031545741325e-06,
+      "loss": 0.7305,
+      "step": 6975
+    },
+    {
+      "epoch": 2.48403122782115,
+      "grad_norm": 8.227882385253906,
+      "learning_rate": 9.555730809674028e-06,
+      "loss": 0.8635,
+      "step": 7000
+    },
+    {
+      "epoch": 2.492902767920511,
+      "grad_norm": 4.739542007446289,
+      "learning_rate": 9.39143007360673e-06,
+      "loss": 0.7291,
+      "step": 7025
+    },
+    {
+      "epoch": 2.501774308019872,
+      "grad_norm": 1.6183698177337646,
+      "learning_rate": 9.227129337539432e-06,
+      "loss": 0.7295,
+      "step": 7050
+    },
+    {
+      "epoch": 2.5106458481192337,
+      "grad_norm": 8.615501403808594,
+      "learning_rate": 9.062828601472135e-06,
+      "loss": 0.7208,
+      "step": 7075
+    },
+    {
+      "epoch": 2.5195173882185946,
+      "grad_norm": 6.233531951904297,
+      "learning_rate": 8.898527865404838e-06,
+      "loss": 0.7005,
+      "step": 7100
+    },
+    {
+      "epoch": 2.528388928317956,
+      "grad_norm": 5.194847106933594,
+      "learning_rate": 8.734227129337539e-06,
+      "loss": 0.8115,
+      "step": 7125
+    },
+    {
+      "epoch": 2.5372604684173172,
+      "grad_norm": 5.162277698516846,
+      "learning_rate": 8.569926393270242e-06,
+      "loss": 0.6818,
+      "step": 7150
+    },
+    {
+      "epoch": 2.5461320085166785,
+      "grad_norm": 10.57759952545166,
+      "learning_rate": 8.405625657202945e-06,
+      "loss": 0.7705,
+      "step": 7175
+    },
+    {
+      "epoch": 2.55500354861604,
+      "grad_norm": 6.097165584564209,
+      "learning_rate": 8.241324921135648e-06,
+      "loss": 0.7589,
+      "step": 7200
+    },
+    {
+      "epoch": 2.563875088715401,
+      "grad_norm": 2.8841516971588135,
+      "learning_rate": 8.077024185068349e-06,
+      "loss": 0.7758,
+      "step": 7225
+    },
+    {
+      "epoch": 2.5727466288147625,
+      "grad_norm": 8.131023406982422,
+      "learning_rate": 7.912723449001051e-06,
+      "loss": 0.7349,
+      "step": 7250
+    },
+    {
+      "epoch": 2.5816181689141233,
+      "grad_norm": 9.21007251739502,
+      "learning_rate": 7.748422712933754e-06,
+      "loss": 0.7964,
+      "step": 7275
+    },
+    {
+      "epoch": 2.5904897090134846,
+      "grad_norm": 4.194328784942627,
+      "learning_rate": 7.584121976866457e-06,
+      "loss": 0.8019,
+      "step": 7300
+    },
+    {
+      "epoch": 2.599361249112846,
+      "grad_norm": 7.350373268127441,
+      "learning_rate": 7.419821240799159e-06,
+      "loss": 0.7725,
+      "step": 7325
+    },
+    {
+      "epoch": 2.6082327892122072,
+      "grad_norm": 9.057943344116211,
+      "learning_rate": 7.255520504731862e-06,
+      "loss": 0.8212,
+      "step": 7350
+    },
+    {
+      "epoch": 2.6171043293115686,
+      "grad_norm": 4.8207478523254395,
+      "learning_rate": 7.091219768664564e-06,
+      "loss": 0.7288,
+      "step": 7375
+    },
+    {
+      "epoch": 2.62597586941093,
+      "grad_norm": 5.385066032409668,
+      "learning_rate": 6.926919032597266e-06,
+      "loss": 0.7013,
+      "step": 7400
+    },
+    {
+      "epoch": 2.634847409510291,
+      "grad_norm": 7.983443737030029,
+      "learning_rate": 6.762618296529969e-06,
+      "loss": 0.6656,
+      "step": 7425
+    },
+    {
+      "epoch": 2.643718949609652,
+      "grad_norm": 9.354117393493652,
+      "learning_rate": 6.598317560462672e-06,
+      "loss": 0.7363,
+      "step": 7450
+    },
+    {
+      "epoch": 2.6525904897090133,
+      "grad_norm": 6.475661754608154,
+      "learning_rate": 6.4340168243953736e-06,
+      "loss": 0.7651,
+      "step": 7475
+    },
+    {
+      "epoch": 2.6614620298083747,
+      "grad_norm": 5.2187676429748535,
+      "learning_rate": 6.2697160883280756e-06,
+      "loss": 0.7259,
+      "step": 7500
+    },
+    {
+      "epoch": 2.670333569907736,
+      "grad_norm": 10.267646789550781,
+      "learning_rate": 6.105415352260778e-06,
+      "loss": 0.7818,
+      "step": 7525
+    },
+    {
+      "epoch": 2.6792051100070973,
+      "grad_norm": 8.05725383758545,
+      "learning_rate": 5.94111461619348e-06,
+      "loss": 0.7006,
+      "step": 7550
+    },
+    {
+      "epoch": 2.6880766501064586,
+      "grad_norm": 6.517486095428467,
+      "learning_rate": 5.776813880126183e-06,
+      "loss": 0.7342,
+      "step": 7575
+    },
+    {
+      "epoch": 2.69694819020582,
+      "grad_norm": 5.03231954574585,
+      "learning_rate": 5.612513144058885e-06,
+      "loss": 0.7959,
+      "step": 7600
+    },
+    {
+      "epoch": 2.7058197303051807,
+      "grad_norm": 3.084221124649048,
+      "learning_rate": 5.448212407991588e-06,
+      "loss": 0.7391,
+      "step": 7625
+    },
+    {
+      "epoch": 2.7146912704045425,
+      "grad_norm": 6.011415004730225,
+      "learning_rate": 5.28391167192429e-06,
+      "loss": 0.7073,
+      "step": 7650
+    },
+    {
+      "epoch": 2.7235628105039034,
+      "grad_norm": 1.8170585632324219,
+      "learning_rate": 5.119610935856993e-06,
+      "loss": 0.6981,
+      "step": 7675
+    },
+    {
+      "epoch": 2.7324343506032647,
+      "grad_norm": 8.507579803466797,
+      "learning_rate": 4.955310199789695e-06,
+      "loss": 0.8989,
+      "step": 7700
+    },
+    {
+      "epoch": 2.741305890702626,
+      "grad_norm": 4.660305976867676,
+      "learning_rate": 4.791009463722398e-06,
+      "loss": 0.7881,
+      "step": 7725
+    },
+    {
+      "epoch": 2.7501774308019873,
+      "grad_norm": 5.1059088706970215,
+      "learning_rate": 4.6267087276551e-06,
+      "loss": 0.6829,
+      "step": 7750
+    },
+    {
+      "epoch": 2.7590489709013486,
+      "grad_norm": 6.890865802764893,
+      "learning_rate": 4.4624079915878024e-06,
+      "loss": 0.7441,
+      "step": 7775
+    },
+    {
+      "epoch": 2.7679205110007095,
+      "grad_norm": 6.044835090637207,
+      "learning_rate": 4.298107255520505e-06,
+      "loss": 0.7416,
+      "step": 7800
+    },
+    {
+      "epoch": 2.776792051100071,
+      "grad_norm": 9.076371192932129,
+      "learning_rate": 4.133806519453207e-06,
+      "loss": 0.7572,
+      "step": 7825
+    },
+    {
+      "epoch": 2.785663591199432,
+      "grad_norm": 8.244555473327637,
+      "learning_rate": 3.96950578338591e-06,
+      "loss": 0.7547,
+      "step": 7850
+    },
+    {
+      "epoch": 2.7945351312987934,
+      "grad_norm": 10.965091705322266,
+      "learning_rate": 3.805205047318612e-06,
+      "loss": 0.7462,
+      "step": 7875
+    },
+    {
+      "epoch": 2.8034066713981547,
+      "grad_norm": 10.940464973449707,
+      "learning_rate": 3.640904311251315e-06,
+      "loss": 0.8307,
+      "step": 7900
+    },
+    {
+      "epoch": 2.812278211497516,
+      "grad_norm": 6.724726676940918,
+      "learning_rate": 3.476603575184017e-06,
+      "loss": 0.7771,
+      "step": 7925
+    },
+    {
+      "epoch": 2.8211497515968773,
+      "grad_norm": 8.394052505493164,
+      "learning_rate": 3.3123028391167197e-06,
+      "loss": 0.7948,
+      "step": 7950
+    },
+    {
+      "epoch": 2.8300212916962386,
+      "grad_norm": 3.6263391971588135,
+      "learning_rate": 3.1480021030494217e-06,
+      "loss": 0.6567,
+      "step": 7975
+    },
+    {
+      "epoch": 2.8388928317956,
+      "grad_norm": 7.270437717437744,
+      "learning_rate": 2.983701366982124e-06,
+      "loss": 0.7087,
+      "step": 8000
+    },
+    {
+      "epoch": 2.847764371894961,
+      "grad_norm": 6.618618965148926,
+      "learning_rate": 2.819400630914827e-06,
+      "loss": 0.6796,
+      "step": 8025
+    },
+    {
+      "epoch": 2.856635911994322,
+      "grad_norm": 7.489841938018799,
+      "learning_rate": 2.6550998948475293e-06,
+      "loss": 0.8368,
+      "step": 8050
+    },
+    {
+      "epoch": 2.8655074520936834,
+      "grad_norm": 3.919420003890991,
+      "learning_rate": 2.4907991587802318e-06,
+      "loss": 0.7596,
+      "step": 8075
+    },
+    {
+      "epoch": 2.8743789921930447,
+      "grad_norm": 7.457404136657715,
+      "learning_rate": 2.326498422712934e-06,
+      "loss": 0.7399,
+      "step": 8100
+    },
+    {
+      "epoch": 2.883250532292406,
+      "grad_norm": 11.167478561401367,
+      "learning_rate": 2.1621976866456366e-06,
+      "loss": 0.8489,
+      "step": 8125
+    },
+    {
+      "epoch": 2.8921220723917673,
+      "grad_norm": 11.763769149780273,
+      "learning_rate": 1.9978969505783385e-06,
+      "loss": 0.6573,
+      "step": 8150
+    },
+    {
+      "epoch": 2.9009936124911286,
+      "grad_norm": 5.394194602966309,
+      "learning_rate": 1.8335962145110412e-06,
+      "loss": 0.6636,
+      "step": 8175
+    },
+    {
+      "epoch": 2.9098651525904895,
+      "grad_norm": 7.921031475067139,
+      "learning_rate": 1.6692954784437436e-06,
+      "loss": 0.7437,
+      "step": 8200
+    },
+    {
+      "epoch": 2.918736692689851,
+      "grad_norm": 5.872561454772949,
+      "learning_rate": 1.504994742376446e-06,
+      "loss": 0.7926,
+      "step": 8225
+    },
+    {
+      "epoch": 2.927608232789212,
+      "grad_norm": 4.337924480438232,
+      "learning_rate": 1.3406940063091484e-06,
+      "loss": 0.7415,
+      "step": 8250
+    },
+    {
+      "epoch": 2.9364797728885734,
+      "grad_norm": 6.103896617889404,
+      "learning_rate": 1.1763932702418508e-06,
+      "loss": 0.7814,
+      "step": 8275
+    },
+    {
+      "epoch": 2.9453513129879347,
+      "grad_norm": 3.75592041015625,
+      "learning_rate": 1.0120925341745532e-06,
+      "loss": 0.7166,
+      "step": 8300
+    },
+    {
+      "epoch": 2.954222853087296,
+      "grad_norm": 4.261561870574951,
+      "learning_rate": 8.477917981072555e-07,
+      "loss": 0.735,
+      "step": 8325
+    },
+    {
+      "epoch": 2.9630943931866573,
+      "grad_norm": 6.112985134124756,
+      "learning_rate": 6.83491062039958e-07,
+      "loss": 0.7205,
+      "step": 8350
+    },
+    {
+      "epoch": 2.971965933286018,
+      "grad_norm": 4.848855018615723,
+      "learning_rate": 5.191903259726603e-07,
+      "loss": 0.7653,
+      "step": 8375
+    },
+    {
+      "epoch": 2.98083747338538,
+      "grad_norm": 7.813061714172363,
+      "learning_rate": 3.548895899053628e-07,
+      "loss": 0.7253,
+      "step": 8400
+    },
+    {
+      "epoch": 2.989709013484741,
+      "grad_norm": 6.210537433624268,
+      "learning_rate": 1.905888538380652e-07,
+      "loss": 0.8043,
+      "step": 8425
+    },
+    {
+      "epoch": 2.998580553584102,
+      "grad_norm": 4.216333389282227,
+      "learning_rate": 2.6288117770767614e-08,
+      "loss": 0.7481,
+      "step": 8450
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.6885536823425022,
+      "eval_f1_macro": 0.6934027713968594,
+      "eval_f1_micro": 0.6885536823425022,
+      "eval_f1_weighted": 0.6903326985699842,
+      "eval_loss": 0.7668559551239014,
+      "eval_precision_macro": 0.7104396368026238,
+      "eval_precision_micro": 0.6885536823425022,
+      "eval_precision_weighted": 0.705612321743312,
+      "eval_recall_macro": 0.6898371166175705,
+      "eval_recall_micro": 0.6885536823425022,
+      "eval_recall_weighted": 0.6885536823425022,
+      "eval_runtime": 1239.6725,
+      "eval_samples_per_second": 4.546,
+      "eval_steps_per_second": 0.285,
+      "step": 8454
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 8454,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4447420182533376.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-8454/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53979d3a7c4167b7fc8acf3d54bf0328b9ab43910addc7c9b0949c14f05c4483
+size 5240

config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 - INTIMA\u00c7\u00c3O INICIAL",
+    "1": "2 - INTIMA\u00c7\u00c3O INSTRU\u00c7\u00c3O",
+    "2": "3 - INTIMA\u00c7\u00c3O RECURSAL",
+    "3": "4 - INTIMA\u00c7\u00c3O CUMPRIMENTO DE SENTEN\u00c7A",
+    "4": "5 - INTIMA\u00c7\u00c3O TR\u00c2NSITO"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 - INTIMA\u00c7\u00c3O INICIAL": 0,
+    "2 - INTIMA\u00c7\u00c3O INSTRU\u00c7\u00c3O": 1,
+    "3 - INTIMA\u00c7\u00c3O RECURSAL": 2,
+    "4 - INTIMA\u00c7\u00c3O CUMPRIMENTO DE SENTEN\u00c7A": 3,
+    "5 - INTIMA\u00c7\u00c3O TR\u00c2NSITO": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 29794
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7b5aea858f25e24827711cfbc02c35b160928b6c19e405df1c24d90f5be4a7d
+size 435731452

runs/Oct30_19-52-50_r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva/events.out.tfevents.1730317972.r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva.112.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccc3d5c1239652a1b8d9f6b4b415bea9d4a446283a4def7360725cfe8523408f
-size 78492

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7d32a5bd25a1376d5e195451aebc68b8991c1afd7eb19cb027903b565e887c0
+size 79890

runs/Oct30_19-52-50_r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva/events.out.tfevents.1730377445.r-ulisesbravo-treino-teste-boqirknt-9f37e-oreva.112.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4db22b0bcb567a589fb70930710cc3490d846471da2098519c2d172761ec4aa
+size 921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53979d3a7c4167b7fc8acf3d54bf0328b9ab43910addc7c9b0949c14f05c4483
+size 5240

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "autotrain-k9ag8-b7nm0/autotrain-data",
+    "model": "neuralmind/bert-base-portuguese-cased",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 128,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "autotrain-k9ag8-b7nm0",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "ulisesbravo",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff