riken01 commited on Aug 26

Commit

7564718

•

1 Parent(s): 674f6c1

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +36 -0
checkpoint-6477/config.json +41 -0
checkpoint-6477/model.safetensors +3 -0
checkpoint-6477/optimizer.pt +3 -0
checkpoint-6477/rng_state.pth +3 -0
checkpoint-6477/scheduler.pt +3 -0
checkpoint-6477/trainer_state.json +1909 -0
checkpoint-6477/training_args.bin +3 -0
config.json +41 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Aug26_07-01-12_r-riken01-flan-t5-base-c75wamlq-d893c-y5aep/events.out.tfevents.1724655673.r-riken01-flan-t5-base-c75wamlq-d893c-y5aep.111.0 +2 -2
runs/Aug26_07-01-12_r-riken01-flan-t5-base-c75wamlq-d893c-y5aep/events.out.tfevents.1724656380.r-riken01-flan-t5-base-c75wamlq-d893c-y5aep.111.1 +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+tags:
+- autotrain
+- text-classification
+base_model: FacebookAI/roberta-base
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 1.2045246362686157
+f1_macro: 0.14211797171438428
+f1_micro: 0.39710843373493976
+f1_weighted: 0.22574498061234247
+precision_macro: 0.09927710843373494
+precision_micro: 0.39710843373493976
+precision_weighted: 0.15769510814341703
+recall_macro: 0.25
+recall_micro: 0.39710843373493976
+recall_weighted: 0.39710843373493976
+accuracy: 0.39710843373493976

checkpoint-6477/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "FacebookAI/roberta-base",
+  "_num_labels": 4,
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "gb",
+    "1": "gc",
+    "2": "gf",
+    "3": "u1"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "gb": 0,
+    "gc": 1,
+    "gf": 2,
+    "u1": 3
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

checkpoint-6477/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73a17fb75bfe03e6850ef726c0196e43a21f4930183cfc6d82d383c0ad389b13
+size 498618976

checkpoint-6477/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d69bfd612fb760805383e21dd5994fbe3e53dff4b59a7c24736e4f5056824f6
+size 997357818

checkpoint-6477/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d06602585737abe14a2a3dbbc8dfc0666b3b16b8640b7bf48d402f2643feeac
+size 14244

checkpoint-6477/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d64402b7afed544df7b457720d32a0494798414dbdec4dd716643517114c5d28
+size 1064

checkpoint-6477/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1909 @@

+{
+  "best_metric": 1.2045246362686157,
+  "best_model_checkpoint": "TrustPilot-balanced-location-roberta/checkpoint-6477",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 6477,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01157943492357573,
+      "grad_norm": 4.521674633026123,
+      "learning_rate": 1.9290123456790124e-06,
+      "loss": 1.4326,
+      "step": 25
+    },
+    {
+      "epoch": 0.02315886984715146,
+      "grad_norm": 3.7206783294677734,
+      "learning_rate": 3.858024691358025e-06,
+      "loss": 1.3881,
+      "step": 50
+    },
+    {
+      "epoch": 0.03473830477072719,
+      "grad_norm": 5.430587291717529,
+      "learning_rate": 5.787037037037038e-06,
+      "loss": 1.3384,
+      "step": 75
+    },
+    {
+      "epoch": 0.04631773969430292,
+      "grad_norm": 5.371946334838867,
+      "learning_rate": 7.63888888888889e-06,
+      "loss": 1.2747,
+      "step": 100
+    },
+    {
+      "epoch": 0.05789717461787865,
+      "grad_norm": 7.290340423583984,
+      "learning_rate": 9.5679012345679e-06,
+      "loss": 1.1889,
+      "step": 125
+    },
+    {
+      "epoch": 0.06947660954145438,
+      "grad_norm": 7.079736232757568,
+      "learning_rate": 1.1496913580246914e-05,
+      "loss": 1.2006,
+      "step": 150
+    },
+    {
+      "epoch": 0.0810560444650301,
+      "grad_norm": 8.647546768188477,
+      "learning_rate": 1.3425925925925928e-05,
+      "loss": 1.1498,
+      "step": 175
+    },
+    {
+      "epoch": 0.09263547938860583,
+      "grad_norm": 12.410360336303711,
+      "learning_rate": 1.5354938271604938e-05,
+      "loss": 1.195,
+      "step": 200
+    },
+    {
+      "epoch": 0.10421491431218156,
+      "grad_norm": 6.746768951416016,
+      "learning_rate": 1.728395061728395e-05,
+      "loss": 1.1531,
+      "step": 225
+    },
+    {
+      "epoch": 0.1157943492357573,
+      "grad_norm": 4.067171573638916,
+      "learning_rate": 1.91358024691358e-05,
+      "loss": 1.2655,
+      "step": 250
+    },
+    {
+      "epoch": 0.12737378415933304,
+      "grad_norm": 8.56696891784668,
+      "learning_rate": 2.1064814814814816e-05,
+      "loss": 1.1576,
+      "step": 275
+    },
+    {
+      "epoch": 0.13895321908290875,
+      "grad_norm": 5.337764263153076,
+      "learning_rate": 2.2916666666666667e-05,
+      "loss": 1.1628,
+      "step": 300
+    },
+    {
+      "epoch": 0.1505326540064845,
+      "grad_norm": 3.743994951248169,
+      "learning_rate": 2.484567901234568e-05,
+      "loss": 1.2353,
+      "step": 325
+    },
+    {
+      "epoch": 0.1621120889300602,
+      "grad_norm": 7.327773571014404,
+      "learning_rate": 2.6774691358024694e-05,
+      "loss": 1.1858,
+      "step": 350
+    },
+    {
+      "epoch": 0.17369152385363595,
+      "grad_norm": 4.512418270111084,
+      "learning_rate": 2.8703703703703706e-05,
+      "loss": 1.1889,
+      "step": 375
+    },
+    {
+      "epoch": 0.18527095877721167,
+      "grad_norm": 5.212406635284424,
+      "learning_rate": 3.063271604938271e-05,
+      "loss": 1.2394,
+      "step": 400
+    },
+    {
+      "epoch": 0.1968503937007874,
+      "grad_norm": 2.575005531311035,
+      "learning_rate": 3.256172839506173e-05,
+      "loss": 1.3288,
+      "step": 425
+    },
+    {
+      "epoch": 0.20842982862436313,
+      "grad_norm": 5.339956283569336,
+      "learning_rate": 3.449074074074074e-05,
+      "loss": 1.2452,
+      "step": 450
+    },
+    {
+      "epoch": 0.22000926354793887,
+      "grad_norm": 5.540539741516113,
+      "learning_rate": 3.6419753086419754e-05,
+      "loss": 1.1581,
+      "step": 475
+    },
+    {
+      "epoch": 0.2315886984715146,
+      "grad_norm": 4.318514347076416,
+      "learning_rate": 3.8348765432098766e-05,
+      "loss": 1.1733,
+      "step": 500
+    },
+    {
+      "epoch": 0.24316813339509033,
+      "grad_norm": 2.906097173690796,
+      "learning_rate": 4.027777777777778e-05,
+      "loss": 1.1512,
+      "step": 525
+    },
+    {
+      "epoch": 0.2547475683186661,
+      "grad_norm": 1.404222011566162,
+      "learning_rate": 4.220679012345679e-05,
+      "loss": 1.3561,
+      "step": 550
+    },
+    {
+      "epoch": 0.2663270032422418,
+      "grad_norm": 2.1675493717193604,
+      "learning_rate": 4.413580246913581e-05,
+      "loss": 1.1446,
+      "step": 575
+    },
+    {
+      "epoch": 0.2779064381658175,
+      "grad_norm": 4.99737548828125,
+      "learning_rate": 4.6064814814814814e-05,
+      "loss": 1.2864,
+      "step": 600
+    },
+    {
+      "epoch": 0.2894858730893932,
+      "grad_norm": 1.7532799243927002,
+      "learning_rate": 4.799382716049383e-05,
+      "loss": 1.269,
+      "step": 625
+    },
+    {
+      "epoch": 0.301065308012969,
+      "grad_norm": 3.0470633506774902,
+      "learning_rate": 4.9922839506172845e-05,
+      "loss": 1.2261,
+      "step": 650
+    },
+    {
+      "epoch": 0.3126447429365447,
+      "grad_norm": 6.678360939025879,
+      "learning_rate": 4.9794132784354094e-05,
+      "loss": 1.1333,
+      "step": 675
+    },
+    {
+      "epoch": 0.3242241778601204,
+      "grad_norm": 4.9130449295043945,
+      "learning_rate": 4.957968776805627e-05,
+      "loss": 1.1869,
+      "step": 700
+    },
+    {
+      "epoch": 0.33580361278369614,
+      "grad_norm": 4.097753047943115,
+      "learning_rate": 4.936524275175845e-05,
+      "loss": 1.2102,
+      "step": 725
+    },
+    {
+      "epoch": 0.3473830477072719,
+      "grad_norm": 3.7552871704101562,
+      "learning_rate": 4.915079773546063e-05,
+      "loss": 1.1769,
+      "step": 750
+    },
+    {
+      "epoch": 0.3589624826308476,
+      "grad_norm": 5.447041988372803,
+      "learning_rate": 4.893635271916281e-05,
+      "loss": 1.2407,
+      "step": 775
+    },
+    {
+      "epoch": 0.37054191755442334,
+      "grad_norm": 3.041606903076172,
+      "learning_rate": 4.872190770286499e-05,
+      "loss": 1.2184,
+      "step": 800
+    },
+    {
+      "epoch": 0.38212135247799905,
+      "grad_norm": 2.11730694770813,
+      "learning_rate": 4.850746268656717e-05,
+      "loss": 1.107,
+      "step": 825
+    },
+    {
+      "epoch": 0.3937007874015748,
+      "grad_norm": 4.180285453796387,
+      "learning_rate": 4.829301767026935e-05,
+      "loss": 1.2928,
+      "step": 850
+    },
+    {
+      "epoch": 0.40528022232515054,
+      "grad_norm": 11.423721313476562,
+      "learning_rate": 4.807857265397153e-05,
+      "loss": 1.1236,
+      "step": 875
+    },
+    {
+      "epoch": 0.41685965724872626,
+      "grad_norm": 3.7874417304992676,
+      "learning_rate": 4.78641276376737e-05,
+      "loss": 1.1486,
+      "step": 900
+    },
+    {
+      "epoch": 0.42843909217230197,
+      "grad_norm": 3.0819077491760254,
+      "learning_rate": 4.764968262137588e-05,
+      "loss": 1.2213,
+      "step": 925
+    },
+    {
+      "epoch": 0.44001852709587774,
+      "grad_norm": 2.008617401123047,
+      "learning_rate": 4.743523760507806e-05,
+      "loss": 1.1765,
+      "step": 950
+    },
+    {
+      "epoch": 0.45159796201945346,
+      "grad_norm": 2.2871665954589844,
+      "learning_rate": 4.722079258878024e-05,
+      "loss": 1.1775,
+      "step": 975
+    },
+    {
+      "epoch": 0.4631773969430292,
+      "grad_norm": 3.751568555831909,
+      "learning_rate": 4.7006347572482416e-05,
+      "loss": 1.153,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4747568318666049,
+      "grad_norm": 2.8901615142822266,
+      "learning_rate": 4.6791902556184595e-05,
+      "loss": 1.2544,
+      "step": 1025
+    },
+    {
+      "epoch": 0.48633626679018066,
+      "grad_norm": 2.7572152614593506,
+      "learning_rate": 4.6577457539886774e-05,
+      "loss": 1.1789,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4979157017137564,
+      "grad_norm": 2.2316782474517822,
+      "learning_rate": 4.6363012523588953e-05,
+      "loss": 1.2035,
+      "step": 1075
+    },
+    {
+      "epoch": 0.5094951366373321,
+      "grad_norm": 2.4344851970672607,
+      "learning_rate": 4.614856750729113e-05,
+      "loss": 1.1438,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5210745715609079,
+      "grad_norm": 2.271672010421753,
+      "learning_rate": 4.593412249099331e-05,
+      "loss": 1.1742,
+      "step": 1125
+    },
+    {
+      "epoch": 0.5326540064844836,
+      "grad_norm": 4.836185932159424,
+      "learning_rate": 4.571967747469549e-05,
+      "loss": 1.1543,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5442334414080593,
+      "grad_norm": 3.8218131065368652,
+      "learning_rate": 4.550523245839767e-05,
+      "loss": 1.1536,
+      "step": 1175
+    },
+    {
+      "epoch": 0.555812876331635,
+      "grad_norm": 2.6469738483428955,
+      "learning_rate": 4.529078744209985e-05,
+      "loss": 1.1915,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5673923112552107,
+      "grad_norm": 5.130224227905273,
+      "learning_rate": 4.507634242580203e-05,
+      "loss": 1.177,
+      "step": 1225
+    },
+    {
+      "epoch": 0.5789717461787864,
+      "grad_norm": 3.587254047393799,
+      "learning_rate": 4.486189740950421e-05,
+      "loss": 1.2532,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5905511811023622,
+      "grad_norm": 1.91807222366333,
+      "learning_rate": 4.464745239320639e-05,
+      "loss": 1.2081,
+      "step": 1275
+    },
+    {
+      "epoch": 0.602130616025938,
+      "grad_norm": 2.0937275886535645,
+      "learning_rate": 4.4433007376908566e-05,
+      "loss": 1.247,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6137100509495137,
+      "grad_norm": 4.973937511444092,
+      "learning_rate": 4.4218562360610745e-05,
+      "loss": 1.1864,
+      "step": 1325
+    },
+    {
+      "epoch": 0.6252894858730894,
+      "grad_norm": 4.225080490112305,
+      "learning_rate": 4.4004117344312924e-05,
+      "loss": 1.3265,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6368689207966651,
+      "grad_norm": 3.563711166381836,
+      "learning_rate": 4.3789672328015096e-05,
+      "loss": 1.2219,
+      "step": 1375
+    },
+    {
+      "epoch": 0.6484483557202408,
+      "grad_norm": 2.596768856048584,
+      "learning_rate": 4.3575227311717276e-05,
+      "loss": 1.2472,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6600277906438166,
+      "grad_norm": 2.263674020767212,
+      "learning_rate": 4.3360782295419455e-05,
+      "loss": 1.1993,
+      "step": 1425
+    },
+    {
+      "epoch": 0.6716072255673923,
+      "grad_norm": 2.2922544479370117,
+      "learning_rate": 4.3146337279121634e-05,
+      "loss": 1.1379,
+      "step": 1450
+    },
+    {
+      "epoch": 0.683186660490968,
+      "grad_norm": 5.394362449645996,
+      "learning_rate": 4.293189226282381e-05,
+      "loss": 1.1085,
+      "step": 1475
+    },
+    {
+      "epoch": 0.6947660954145438,
+      "grad_norm": 5.802165508270264,
+      "learning_rate": 4.271744724652599e-05,
+      "loss": 1.1417,
+      "step": 1500
+    },
+    {
+      "epoch": 0.7063455303381195,
+      "grad_norm": 3.079671621322632,
+      "learning_rate": 4.250300223022817e-05,
+      "loss": 1.2111,
+      "step": 1525
+    },
+    {
+      "epoch": 0.7179249652616952,
+      "grad_norm": 2.836214303970337,
+      "learning_rate": 4.228855721393035e-05,
+      "loss": 1.2457,
+      "step": 1550
+    },
+    {
+      "epoch": 0.729504400185271,
+      "grad_norm": 6.700901985168457,
+      "learning_rate": 4.207411219763253e-05,
+      "loss": 1.1931,
+      "step": 1575
+    },
+    {
+      "epoch": 0.7410838351088467,
+      "grad_norm": 4.292962551116943,
+      "learning_rate": 4.18596671813347e-05,
+      "loss": 1.2196,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7526632700324224,
+      "grad_norm": 2.4369819164276123,
+      "learning_rate": 4.164522216503689e-05,
+      "loss": 1.1819,
+      "step": 1625
+    },
+    {
+      "epoch": 0.7642427049559981,
+      "grad_norm": 5.2853474617004395,
+      "learning_rate": 4.143077714873907e-05,
+      "loss": 1.1874,
+      "step": 1650
+    },
+    {
+      "epoch": 0.7758221398795738,
+      "grad_norm": 1.9918153285980225,
+      "learning_rate": 4.1216332132441246e-05,
+      "loss": 1.1866,
+      "step": 1675
+    },
+    {
+      "epoch": 0.7874015748031497,
+      "grad_norm": 5.2242231369018555,
+      "learning_rate": 4.1001887116143425e-05,
+      "loss": 1.1775,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7989810097267254,
+      "grad_norm": 2.511507272720337,
+      "learning_rate": 4.0787442099845605e-05,
+      "loss": 1.2418,
+      "step": 1725
+    },
+    {
+      "epoch": 0.8105604446503011,
+      "grad_norm": 2.0094120502471924,
+      "learning_rate": 4.0572997083547784e-05,
+      "loss": 1.2168,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8221398795738768,
+      "grad_norm": 2.8366715908050537,
+      "learning_rate": 4.035855206724996e-05,
+      "loss": 1.217,
+      "step": 1775
+    },
+    {
+      "epoch": 0.8337193144974525,
+      "grad_norm": 4.902674674987793,
+      "learning_rate": 4.014410705095214e-05,
+      "loss": 1.1925,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8452987494210282,
+      "grad_norm": 2.4211857318878174,
+      "learning_rate": 3.992966203465432e-05,
+      "loss": 1.1716,
+      "step": 1825
+    },
+    {
+      "epoch": 0.8568781843446039,
+      "grad_norm": 4.9972381591796875,
+      "learning_rate": 3.9715217018356493e-05,
+      "loss": 1.1871,
+      "step": 1850
+    },
+    {
+      "epoch": 0.8684576192681797,
+      "grad_norm": 3.486520290374756,
+      "learning_rate": 3.950077200205867e-05,
+      "loss": 1.2001,
+      "step": 1875
+    },
+    {
+      "epoch": 0.8800370541917555,
+      "grad_norm": 2.2144150733947754,
+      "learning_rate": 3.928632698576085e-05,
+      "loss": 1.232,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8916164891153312,
+      "grad_norm": 6.714953899383545,
+      "learning_rate": 3.907188196946303e-05,
+      "loss": 1.1767,
+      "step": 1925
+    },
+    {
+      "epoch": 0.9031959240389069,
+      "grad_norm": 6.166855812072754,
+      "learning_rate": 3.885743695316521e-05,
+      "loss": 1.1167,
+      "step": 1950
+    },
+    {
+      "epoch": 0.9147753589624826,
+      "grad_norm": 3.6272430419921875,
+      "learning_rate": 3.864299193686739e-05,
+      "loss": 1.2003,
+      "step": 1975
+    },
+    {
+      "epoch": 0.9263547938860583,
+      "grad_norm": 5.192286014556885,
+      "learning_rate": 3.842854692056957e-05,
+      "loss": 1.2661,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9379342288096341,
+      "grad_norm": 3.80322003364563,
+      "learning_rate": 3.821410190427175e-05,
+      "loss": 1.2087,
+      "step": 2025
+    },
+    {
+      "epoch": 0.9495136637332098,
+      "grad_norm": 2.330951690673828,
+      "learning_rate": 3.799965688797393e-05,
+      "loss": 1.2075,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9610930986567855,
+      "grad_norm": 3.1722116470336914,
+      "learning_rate": 3.77852118716761e-05,
+      "loss": 1.1741,
+      "step": 2075
+    },
+    {
+      "epoch": 0.9726725335803613,
+      "grad_norm": 3.6307098865509033,
+      "learning_rate": 3.757076685537828e-05,
+      "loss": 1.1772,
+      "step": 2100
+    },
+    {
+      "epoch": 0.984251968503937,
+      "grad_norm": 2.525423765182495,
+      "learning_rate": 3.735632183908046e-05,
+      "loss": 1.1213,
+      "step": 2125
+    },
+    {
+      "epoch": 0.9958314034275128,
+      "grad_norm": 2.655104160308838,
+      "learning_rate": 3.7141876822782637e-05,
+      "loss": 1.1762,
+      "step": 2150
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.39710843373493976,
+      "eval_f1_macro": 0.14211797171438428,
+      "eval_f1_micro": 0.39710843373493976,
+      "eval_f1_weighted": 0.22574498061234247,
+      "eval_loss": 1.2174618244171143,
+      "eval_precision_macro": 0.09927710843373494,
+      "eval_precision_micro": 0.39710843373493976,
+      "eval_precision_weighted": 0.15769510814341703,
+      "eval_recall_macro": 0.25,
+      "eval_recall_micro": 0.39710843373493976,
+      "eval_recall_weighted": 0.39710843373493976,
+      "eval_runtime": 4.9841,
+      "eval_samples_per_second": 416.326,
+      "eval_steps_per_second": 26.083,
+      "step": 2159
+    },
+    {
+      "epoch": 1.0074108383510885,
+      "grad_norm": 2.2906293869018555,
+      "learning_rate": 3.6927431806484816e-05,
+      "loss": 1.2513,
+      "step": 2175
+    },
+    {
+      "epoch": 1.0189902732746643,
+      "grad_norm": 4.764555931091309,
+      "learning_rate": 3.6712986790187e-05,
+      "loss": 1.2133,
+      "step": 2200
+    },
+    {
+      "epoch": 1.03056970819824,
+      "grad_norm": 2.511648178100586,
+      "learning_rate": 3.649854177388918e-05,
+      "loss": 1.1227,
+      "step": 2225
+    },
+    {
+      "epoch": 1.0421491431218157,
+      "grad_norm": 5.154523849487305,
+      "learning_rate": 3.628409675759136e-05,
+      "loss": 1.1251,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0537285780453913,
+      "grad_norm": 2.393103837966919,
+      "learning_rate": 3.606965174129354e-05,
+      "loss": 1.2593,
+      "step": 2275
+    },
+    {
+      "epoch": 1.0653080129689672,
+      "grad_norm": 2.7954907417297363,
+      "learning_rate": 3.585520672499571e-05,
+      "loss": 1.2476,
+      "step": 2300
+    },
+    {
+      "epoch": 1.0768874478925428,
+      "grad_norm": 4.258531093597412,
+      "learning_rate": 3.564076170869789e-05,
+      "loss": 1.2369,
+      "step": 2325
+    },
+    {
+      "epoch": 1.0884668828161186,
+      "grad_norm": 4.744079113006592,
+      "learning_rate": 3.542631669240007e-05,
+      "loss": 1.175,
+      "step": 2350
+    },
+    {
+      "epoch": 1.1000463177396944,
+      "grad_norm": 4.467709541320801,
+      "learning_rate": 3.521187167610225e-05,
+      "loss": 1.1174,
+      "step": 2375
+    },
+    {
+      "epoch": 1.11162575266327,
+      "grad_norm": 2.7314538955688477,
+      "learning_rate": 3.499742665980443e-05,
+      "loss": 1.2007,
+      "step": 2400
+    },
+    {
+      "epoch": 1.1232051875868458,
+      "grad_norm": 1.8456259965896606,
+      "learning_rate": 3.478298164350661e-05,
+      "loss": 1.2232,
+      "step": 2425
+    },
+    {
+      "epoch": 1.1347846225104214,
+      "grad_norm": 3.8557677268981934,
+      "learning_rate": 3.4568536627208786e-05,
+      "loss": 1.2338,
+      "step": 2450
+    },
+    {
+      "epoch": 1.1463640574339973,
+      "grad_norm": 3.338961124420166,
+      "learning_rate": 3.4354091610910965e-05,
+      "loss": 1.2004,
+      "step": 2475
+    },
+    {
+      "epoch": 1.1579434923575729,
+      "grad_norm": 2.3821332454681396,
+      "learning_rate": 3.4139646594613145e-05,
+      "loss": 1.1967,
+      "step": 2500
+    },
+    {
+      "epoch": 1.1695229272811487,
+      "grad_norm": 2.296182155609131,
+      "learning_rate": 3.3925201578315324e-05,
+      "loss": 1.1825,
+      "step": 2525
+    },
+    {
+      "epoch": 1.1811023622047245,
+      "grad_norm": 2.287925958633423,
+      "learning_rate": 3.3710756562017496e-05,
+      "loss": 1.1661,
+      "step": 2550
+    },
+    {
+      "epoch": 1.1926817971283001,
+      "grad_norm": 3.0742363929748535,
+      "learning_rate": 3.3496311545719675e-05,
+      "loss": 1.1855,
+      "step": 2575
+    },
+    {
+      "epoch": 1.204261232051876,
+      "grad_norm": 2.94059157371521,
+      "learning_rate": 3.3281866529421854e-05,
+      "loss": 1.186,
+      "step": 2600
+    },
+    {
+      "epoch": 1.2158406669754516,
+      "grad_norm": 5.658060073852539,
+      "learning_rate": 3.3067421513124034e-05,
+      "loss": 1.2018,
+      "step": 2625
+    },
+    {
+      "epoch": 1.2274201018990274,
+      "grad_norm": 4.225418567657471,
+      "learning_rate": 3.285297649682621e-05,
+      "loss": 1.1913,
+      "step": 2650
+    },
+    {
+      "epoch": 1.238999536822603,
+      "grad_norm": 3.121039867401123,
+      "learning_rate": 3.263853148052839e-05,
+      "loss": 1.1655,
+      "step": 2675
+    },
+    {
+      "epoch": 1.2505789717461788,
+      "grad_norm": 2.750720977783203,
+      "learning_rate": 3.242408646423057e-05,
+      "loss": 1.1818,
+      "step": 2700
+    },
+    {
+      "epoch": 1.2621584066697547,
+      "grad_norm": 3.299870491027832,
+      "learning_rate": 3.220964144793275e-05,
+      "loss": 1.2333,
+      "step": 2725
+    },
+    {
+      "epoch": 1.2737378415933303,
+      "grad_norm": 1.8936024904251099,
+      "learning_rate": 3.1995196431634936e-05,
+      "loss": 1.2091,
+      "step": 2750
+    },
+    {
+      "epoch": 1.2853172765169059,
+      "grad_norm": 4.938189506530762,
+      "learning_rate": 3.178075141533711e-05,
+      "loss": 1.2436,
+      "step": 2775
+    },
+    {
+      "epoch": 1.2968967114404817,
+      "grad_norm": 3.0422909259796143,
+      "learning_rate": 3.156630639903929e-05,
+      "loss": 1.2071,
+      "step": 2800
+    },
+    {
+      "epoch": 1.3084761463640575,
+      "grad_norm": 3.3571670055389404,
+      "learning_rate": 3.135186138274147e-05,
+      "loss": 1.1012,
+      "step": 2825
+    },
+    {
+      "epoch": 1.3200555812876331,
+      "grad_norm": 5.697854518890381,
+      "learning_rate": 3.1137416366443646e-05,
+      "loss": 1.1837,
+      "step": 2850
+    },
+    {
+      "epoch": 1.331635016211209,
+      "grad_norm": 2.8652396202087402,
+      "learning_rate": 3.0922971350145825e-05,
+      "loss": 1.2351,
+      "step": 2875
+    },
+    {
+      "epoch": 1.3432144511347845,
+      "grad_norm": 2.0512943267822266,
+      "learning_rate": 3.0708526333848004e-05,
+      "loss": 1.1621,
+      "step": 2900
+    },
+    {
+      "epoch": 1.3547938860583604,
+      "grad_norm": 3.4354703426361084,
+      "learning_rate": 3.0494081317550183e-05,
+      "loss": 1.1627,
+      "step": 2925
+    },
+    {
+      "epoch": 1.366373320981936,
+      "grad_norm": 2.0285403728485107,
+      "learning_rate": 3.0279636301252362e-05,
+      "loss": 1.2202,
+      "step": 2950
+    },
+    {
+      "epoch": 1.3779527559055118,
+      "grad_norm": 4.55291223526001,
+      "learning_rate": 3.006519128495454e-05,
+      "loss": 1.1935,
+      "step": 2975
+    },
+    {
+      "epoch": 1.3895321908290876,
+      "grad_norm": 3.867063045501709,
+      "learning_rate": 2.9850746268656714e-05,
+      "loss": 1.0448,
+      "step": 3000
+    },
+    {
+      "epoch": 1.4011116257526632,
+      "grad_norm": 3.6873021125793457,
+      "learning_rate": 2.9636301252358893e-05,
+      "loss": 1.2339,
+      "step": 3025
+    },
+    {
+      "epoch": 1.412691060676239,
+      "grad_norm": 2.2147438526153564,
+      "learning_rate": 2.9421856236061072e-05,
+      "loss": 1.1809,
+      "step": 3050
+    },
+    {
+      "epoch": 1.4242704955998147,
+      "grad_norm": 2.6401538848876953,
+      "learning_rate": 2.9207411219763255e-05,
+      "loss": 1.1291,
+      "step": 3075
+    },
+    {
+      "epoch": 1.4358499305233905,
+      "grad_norm": 2.2739460468292236,
+      "learning_rate": 2.8992966203465434e-05,
+      "loss": 1.1953,
+      "step": 3100
+    },
+    {
+      "epoch": 1.447429365446966,
+      "grad_norm": 1.2269738912582397,
+      "learning_rate": 2.8778521187167613e-05,
+      "loss": 1.2693,
+      "step": 3125
+    },
+    {
+      "epoch": 1.459008800370542,
+      "grad_norm": 4.429539680480957,
+      "learning_rate": 2.8564076170869792e-05,
+      "loss": 1.2717,
+      "step": 3150
+    },
+    {
+      "epoch": 1.4705882352941178,
+      "grad_norm": 3.294246196746826,
+      "learning_rate": 2.834963115457197e-05,
+      "loss": 1.1788,
+      "step": 3175
+    },
+    {
+      "epoch": 1.4821676702176934,
+      "grad_norm": 5.130248546600342,
+      "learning_rate": 2.813518613827415e-05,
+      "loss": 1.1758,
+      "step": 3200
+    },
+    {
+      "epoch": 1.4937471051412692,
+      "grad_norm": 2.9172611236572266,
+      "learning_rate": 2.792074112197633e-05,
+      "loss": 1.1474,
+      "step": 3225
+    },
+    {
+      "epoch": 1.5053265400648448,
+      "grad_norm": 2.7223055362701416,
+      "learning_rate": 2.7706296105678502e-05,
+      "loss": 1.2265,
+      "step": 3250
+    },
+    {
+      "epoch": 1.5169059749884206,
+      "grad_norm": 3.7259788513183594,
+      "learning_rate": 2.749185108938068e-05,
+      "loss": 1.2508,
+      "step": 3275
+    },
+    {
+      "epoch": 1.5284854099119962,
+      "grad_norm": 1.7198467254638672,
+      "learning_rate": 2.727740607308286e-05,
+      "loss": 1.1782,
+      "step": 3300
+    },
+    {
+      "epoch": 1.540064844835572,
+      "grad_norm": 1.9382416009902954,
+      "learning_rate": 2.706296105678504e-05,
+      "loss": 1.2074,
+      "step": 3325
+    },
+    {
+      "epoch": 1.5516442797591479,
+      "grad_norm": 2.2858059406280518,
+      "learning_rate": 2.6848516040487222e-05,
+      "loss": 1.1473,
+      "step": 3350
+    },
+    {
+      "epoch": 1.5632237146827235,
+      "grad_norm": 1.461945652961731,
+      "learning_rate": 2.66340710241894e-05,
+      "loss": 1.1901,
+      "step": 3375
+    },
+    {
+      "epoch": 1.574803149606299,
+      "grad_norm": 5.549622058868408,
+      "learning_rate": 2.641962600789158e-05,
+      "loss": 1.1618,
+      "step": 3400
+    },
+    {
+      "epoch": 1.586382584529875,
+      "grad_norm": 2.907963991165161,
+      "learning_rate": 2.620518099159376e-05,
+      "loss": 1.1767,
+      "step": 3425
+    },
+    {
+      "epoch": 1.5979620194534507,
+      "grad_norm": 1.6633723974227905,
+      "learning_rate": 2.599073597529594e-05,
+      "loss": 1.1997,
+      "step": 3450
+    },
+    {
+      "epoch": 1.6095414543770263,
+      "grad_norm": 5.5080742835998535,
+      "learning_rate": 2.577629095899811e-05,
+      "loss": 1.1881,
+      "step": 3475
+    },
+    {
+      "epoch": 1.6211208893006022,
+      "grad_norm": 2.5473108291625977,
+      "learning_rate": 2.556184594270029e-05,
+      "loss": 1.15,
+      "step": 3500
+    },
+    {
+      "epoch": 1.632700324224178,
+      "grad_norm": 2.0483057498931885,
+      "learning_rate": 2.534740092640247e-05,
+      "loss": 1.2271,
+      "step": 3525
+    },
+    {
+      "epoch": 1.6442797591477536,
+      "grad_norm": 3.6027846336364746,
+      "learning_rate": 2.513295591010465e-05,
+      "loss": 1.2121,
+      "step": 3550
+    },
+    {
+      "epoch": 1.6558591940713292,
+      "grad_norm": 3.154784917831421,
+      "learning_rate": 2.4918510893806828e-05,
+      "loss": 1.2706,
+      "step": 3575
+    },
+    {
+      "epoch": 1.667438628994905,
+      "grad_norm": 5.780117511749268,
+      "learning_rate": 2.4704065877509007e-05,
+      "loss": 1.1916,
+      "step": 3600
+    },
+    {
+      "epoch": 1.6790180639184809,
+      "grad_norm": 4.522841930389404,
+      "learning_rate": 2.448962086121119e-05,
+      "loss": 1.1344,
+      "step": 3625
+    },
+    {
+      "epoch": 1.6905974988420565,
+      "grad_norm": 2.302856922149658,
+      "learning_rate": 2.4275175844913365e-05,
+      "loss": 1.1316,
+      "step": 3650
+    },
+    {
+      "epoch": 1.7021769337656323,
+      "grad_norm": 3.6142489910125732,
+      "learning_rate": 2.4060730828615544e-05,
+      "loss": 1.1464,
+      "step": 3675
+    },
+    {
+      "epoch": 1.713756368689208,
+      "grad_norm": 3.417003870010376,
+      "learning_rate": 2.3846285812317723e-05,
+      "loss": 1.2767,
+      "step": 3700
+    },
+    {
+      "epoch": 1.7253358036127837,
+      "grad_norm": 1.8820807933807373,
+      "learning_rate": 2.3631840796019903e-05,
+      "loss": 1.1759,
+      "step": 3725
+    },
+    {
+      "epoch": 1.7369152385363593,
+      "grad_norm": 2.0070981979370117,
+      "learning_rate": 2.341739577972208e-05,
+      "loss": 1.2357,
+      "step": 3750
+    },
+    {
+      "epoch": 1.7484946734599351,
+      "grad_norm": 1.9160246849060059,
+      "learning_rate": 2.3202950763424257e-05,
+      "loss": 1.148,
+      "step": 3775
+    },
+    {
+      "epoch": 1.760074108383511,
+      "grad_norm": 2.4420526027679443,
+      "learning_rate": 2.2988505747126437e-05,
+      "loss": 1.239,
+      "step": 3800
+    },
+    {
+      "epoch": 1.7716535433070866,
+      "grad_norm": 1.5695481300354004,
+      "learning_rate": 2.2774060730828616e-05,
+      "loss": 1.1936,
+      "step": 3825
+    },
+    {
+      "epoch": 1.7832329782306622,
+      "grad_norm": 2.090928077697754,
+      "learning_rate": 2.2559615714530795e-05,
+      "loss": 1.176,
+      "step": 3850
+    },
+    {
+      "epoch": 1.7948124131542382,
+      "grad_norm": 2.7507429122924805,
+      "learning_rate": 2.2345170698232974e-05,
+      "loss": 1.1847,
+      "step": 3875
+    },
+    {
+      "epoch": 1.8063918480778138,
+      "grad_norm": 2.7657129764556885,
+      "learning_rate": 2.2130725681935153e-05,
+      "loss": 1.1522,
+      "step": 3900
+    },
+    {
+      "epoch": 1.8179712830013894,
+      "grad_norm": 4.012863636016846,
+      "learning_rate": 2.1916280665637332e-05,
+      "loss": 1.1398,
+      "step": 3925
+    },
+    {
+      "epoch": 1.8295507179249653,
+      "grad_norm": 2.7316641807556152,
+      "learning_rate": 2.170183564933951e-05,
+      "loss": 1.1766,
+      "step": 3950
+    },
+    {
+      "epoch": 1.841130152848541,
+      "grad_norm": 3.0468456745147705,
+      "learning_rate": 2.148739063304169e-05,
+      "loss": 1.2055,
+      "step": 3975
+    },
+    {
+      "epoch": 1.8527095877721167,
+      "grad_norm": 2.0280911922454834,
+      "learning_rate": 2.1272945616743866e-05,
+      "loss": 1.184,
+      "step": 4000
+    },
+    {
+      "epoch": 1.8642890226956923,
+      "grad_norm": 2.5638182163238525,
+      "learning_rate": 2.1058500600446046e-05,
+      "loss": 1.2357,
+      "step": 4025
+    },
+    {
+      "epoch": 1.8758684576192681,
+      "grad_norm": 2.283189535140991,
+      "learning_rate": 2.0844055584148225e-05,
+      "loss": 1.1874,
+      "step": 4050
+    },
+    {
+      "epoch": 1.887447892542844,
+      "grad_norm": 1.3770339488983154,
+      "learning_rate": 2.0629610567850404e-05,
+      "loss": 1.2674,
+      "step": 4075
+    },
+    {
+      "epoch": 1.8990273274664196,
+      "grad_norm": 1.9555165767669678,
+      "learning_rate": 2.0415165551552583e-05,
+      "loss": 1.1922,
+      "step": 4100
+    },
+    {
+      "epoch": 1.9106067623899954,
+      "grad_norm": 3.256969928741455,
+      "learning_rate": 2.0200720535254762e-05,
+      "loss": 1.1587,
+      "step": 4125
+    },
+    {
+      "epoch": 1.9221861973135712,
+      "grad_norm": 1.853826642036438,
+      "learning_rate": 1.998627551895694e-05,
+      "loss": 1.1769,
+      "step": 4150
+    },
+    {
+      "epoch": 1.9337656322371468,
+      "grad_norm": 2.319624662399292,
+      "learning_rate": 1.977183050265912e-05,
+      "loss": 1.1328,
+      "step": 4175
+    },
+    {
+      "epoch": 1.9453450671607224,
+      "grad_norm": 2.720109701156616,
+      "learning_rate": 1.95573854863613e-05,
+      "loss": 1.1331,
+      "step": 4200
+    },
+    {
+      "epoch": 1.9569245020842982,
+      "grad_norm": 4.079843044281006,
+      "learning_rate": 1.9342940470063475e-05,
+      "loss": 1.1479,
+      "step": 4225
+    },
+    {
+      "epoch": 1.968503937007874,
+      "grad_norm": 3.2058353424072266,
+      "learning_rate": 1.9128495453765654e-05,
+      "loss": 1.1891,
+      "step": 4250
+    },
+    {
+      "epoch": 1.9800833719314497,
+      "grad_norm": 2.1098670959472656,
+      "learning_rate": 1.8914050437467834e-05,
+      "loss": 1.1548,
+      "step": 4275
+    },
+    {
+      "epoch": 1.9916628068550255,
+      "grad_norm": 2.4204399585723877,
+      "learning_rate": 1.8699605421170013e-05,
+      "loss": 1.2393,
+      "step": 4300
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.39710843373493976,
+      "eval_f1_macro": 0.14211797171438428,
+      "eval_f1_micro": 0.39710843373493976,
+      "eval_f1_weighted": 0.22574498061234247,
+      "eval_loss": 1.2068006992340088,
+      "eval_precision_macro": 0.09927710843373494,
+      "eval_precision_micro": 0.39710843373493976,
+      "eval_precision_weighted": 0.15769510814341703,
+      "eval_recall_macro": 0.25,
+      "eval_recall_micro": 0.39710843373493976,
+      "eval_recall_weighted": 0.39710843373493976,
+      "eval_runtime": 5.002,
+      "eval_samples_per_second": 414.833,
+      "eval_steps_per_second": 25.99,
+      "step": 4318
+    },
+    {
+      "epoch": 2.0032422417786013,
+      "grad_norm": 4.668676376342773,
+      "learning_rate": 1.8485160404872192e-05,
+      "loss": 1.2158,
+      "step": 4325
+    },
+    {
+      "epoch": 2.014821676702177,
+      "grad_norm": 2.2886784076690674,
+      "learning_rate": 1.8270715388574368e-05,
+      "loss": 1.1545,
+      "step": 4350
+    },
+    {
+      "epoch": 2.0264011116257525,
+      "grad_norm": 4.7184038162231445,
+      "learning_rate": 1.805627037227655e-05,
+      "loss": 1.2628,
+      "step": 4375
+    },
+    {
+      "epoch": 2.0379805465493286,
+      "grad_norm": 1.9082050323486328,
+      "learning_rate": 1.784182535597873e-05,
+      "loss": 1.1398,
+      "step": 4400
+    },
+    {
+      "epoch": 2.049559981472904,
+      "grad_norm": 2.6623945236206055,
+      "learning_rate": 1.762738033968091e-05,
+      "loss": 1.182,
+      "step": 4425
+    },
+    {
+      "epoch": 2.06113941639648,
+      "grad_norm": 5.472179412841797,
+      "learning_rate": 1.7412935323383088e-05,
+      "loss": 1.2342,
+      "step": 4450
+    },
+    {
+      "epoch": 2.0727188513200554,
+      "grad_norm": 2.2513480186462402,
+      "learning_rate": 1.7198490307085263e-05,
+      "loss": 1.219,
+      "step": 4475
+    },
+    {
+      "epoch": 2.0842982862436314,
+      "grad_norm": 2.979966878890991,
+      "learning_rate": 1.6984045290787443e-05,
+      "loss": 1.2084,
+      "step": 4500
+    },
+    {
+      "epoch": 2.095877721167207,
+      "grad_norm": 4.656105041503906,
+      "learning_rate": 1.6769600274489622e-05,
+      "loss": 1.2289,
+      "step": 4525
+    },
+    {
+      "epoch": 2.1074571560907827,
+      "grad_norm": 8.104940414428711,
+      "learning_rate": 1.65551552581918e-05,
+      "loss": 1.2331,
+      "step": 4550
+    },
+    {
+      "epoch": 2.1190365910143587,
+      "grad_norm": 6.20884370803833,
+      "learning_rate": 1.6340710241893977e-05,
+      "loss": 1.2045,
+      "step": 4575
+    },
+    {
+      "epoch": 2.1306160259379343,
+      "grad_norm": 3.645780086517334,
+      "learning_rate": 1.6126265225596156e-05,
+      "loss": 1.1754,
+      "step": 4600
+    },
+    {
+      "epoch": 2.14219546086151,
+      "grad_norm": 2.960564136505127,
+      "learning_rate": 1.5911820209298335e-05,
+      "loss": 1.195,
+      "step": 4625
+    },
+    {
+      "epoch": 2.1537748957850855,
+      "grad_norm": 2.6137890815734863,
+      "learning_rate": 1.5697375193000517e-05,
+      "loss": 1.099,
+      "step": 4650
+    },
+    {
+      "epoch": 2.1653543307086616,
+      "grad_norm": 4.492011547088623,
+      "learning_rate": 1.5482930176702697e-05,
+      "loss": 1.2088,
+      "step": 4675
+    },
+    {
+      "epoch": 2.176933765632237,
+      "grad_norm": 3.653522253036499,
+      "learning_rate": 1.5268485160404872e-05,
+      "loss": 1.2309,
+      "step": 4700
+    },
+    {
+      "epoch": 2.1885132005558128,
+      "grad_norm": 2.6041195392608643,
+      "learning_rate": 1.5054040144107052e-05,
+      "loss": 1.2099,
+      "step": 4725
+    },
+    {
+      "epoch": 2.200092635479389,
+      "grad_norm": 5.426154613494873,
+      "learning_rate": 1.483959512780923e-05,
+      "loss": 1.203,
+      "step": 4750
+    },
+    {
+      "epoch": 2.2116720704029644,
+      "grad_norm": 2.009709358215332,
+      "learning_rate": 1.462515011151141e-05,
+      "loss": 1.1193,
+      "step": 4775
+    },
+    {
+      "epoch": 2.22325150532654,
+      "grad_norm": 2.586091995239258,
+      "learning_rate": 1.4410705095213589e-05,
+      "loss": 1.1109,
+      "step": 4800
+    },
+    {
+      "epoch": 2.2348309402501156,
+      "grad_norm": 1.9446204900741577,
+      "learning_rate": 1.4196260078915766e-05,
+      "loss": 1.2251,
+      "step": 4825
+    },
+    {
+      "epoch": 2.2464103751736917,
+      "grad_norm": 2.2268826961517334,
+      "learning_rate": 1.3981815062617946e-05,
+      "loss": 1.1447,
+      "step": 4850
+    },
+    {
+      "epoch": 2.2579898100972673,
+      "grad_norm": 4.135994911193848,
+      "learning_rate": 1.3767370046320125e-05,
+      "loss": 1.1326,
+      "step": 4875
+    },
+    {
+      "epoch": 2.269569245020843,
+      "grad_norm": 1.3713667392730713,
+      "learning_rate": 1.3552925030022304e-05,
+      "loss": 1.2048,
+      "step": 4900
+    },
+    {
+      "epoch": 2.281148679944419,
+      "grad_norm": 4.801929473876953,
+      "learning_rate": 1.333848001372448e-05,
+      "loss": 1.1726,
+      "step": 4925
+    },
+    {
+      "epoch": 2.2927281148679945,
+      "grad_norm": 1.5106154680252075,
+      "learning_rate": 1.312403499742666e-05,
+      "loss": 1.2191,
+      "step": 4950
+    },
+    {
+      "epoch": 2.30430754979157,
+      "grad_norm": 1.9938125610351562,
+      "learning_rate": 1.290958998112884e-05,
+      "loss": 1.181,
+      "step": 4975
+    },
+    {
+      "epoch": 2.3158869847151458,
+      "grad_norm": 5.004785060882568,
+      "learning_rate": 1.2695144964831019e-05,
+      "loss": 1.1746,
+      "step": 5000
+    },
+    {
+      "epoch": 2.327466419638722,
+      "grad_norm": 2.9320216178894043,
+      "learning_rate": 1.2480699948533196e-05,
+      "loss": 1.2399,
+      "step": 5025
+    },
+    {
+      "epoch": 2.3390458545622974,
+      "grad_norm": 4.154562473297119,
+      "learning_rate": 1.2266254932235375e-05,
+      "loss": 1.2275,
+      "step": 5050
+    },
+    {
+      "epoch": 2.350625289485873,
+      "grad_norm": 2.5340206623077393,
+      "learning_rate": 1.2051809915937553e-05,
+      "loss": 1.1873,
+      "step": 5075
+    },
+    {
+      "epoch": 2.362204724409449,
+      "grad_norm": 2.7467281818389893,
+      "learning_rate": 1.1837364899639734e-05,
+      "loss": 1.2125,
+      "step": 5100
+    },
+    {
+      "epoch": 2.3737841593330247,
+      "grad_norm": 2.1378886699676514,
+      "learning_rate": 1.1622919883341913e-05,
+      "loss": 1.1484,
+      "step": 5125
+    },
+    {
+      "epoch": 2.3853635942566003,
+      "grad_norm": 1.7250367403030396,
+      "learning_rate": 1.140847486704409e-05,
+      "loss": 1.1293,
+      "step": 5150
+    },
+    {
+      "epoch": 2.396943029180176,
+      "grad_norm": 4.303859233856201,
+      "learning_rate": 1.119402985074627e-05,
+      "loss": 1.2065,
+      "step": 5175
+    },
+    {
+      "epoch": 2.408522464103752,
+      "grad_norm": 4.186789035797119,
+      "learning_rate": 1.0979584834448447e-05,
+      "loss": 1.1573,
+      "step": 5200
+    },
+    {
+      "epoch": 2.4201018990273275,
+      "grad_norm": 2.763376474380493,
+      "learning_rate": 1.0765139818150626e-05,
+      "loss": 1.2093,
+      "step": 5225
+    },
+    {
+      "epoch": 2.431681333950903,
+      "grad_norm": 4.167290210723877,
+      "learning_rate": 1.0550694801852805e-05,
+      "loss": 1.2073,
+      "step": 5250
+    },
+    {
+      "epoch": 2.4432607688744787,
+      "grad_norm": 4.77427864074707,
+      "learning_rate": 1.0336249785554984e-05,
+      "loss": 1.2074,
+      "step": 5275
+    },
+    {
+      "epoch": 2.454840203798055,
+      "grad_norm": 2.2546989917755127,
+      "learning_rate": 1.0121804769257163e-05,
+      "loss": 1.2089,
+      "step": 5300
+    },
+    {
+      "epoch": 2.4664196387216304,
+      "grad_norm": 2.603929281234741,
+      "learning_rate": 9.907359752959341e-06,
+      "loss": 1.1774,
+      "step": 5325
+    },
+    {
+      "epoch": 2.477999073645206,
+      "grad_norm": 2.2647411823272705,
+      "learning_rate": 9.69291473666152e-06,
+      "loss": 1.1509,
+      "step": 5350
+    },
+    {
+      "epoch": 2.4895785085687816,
+      "grad_norm": 2.784689426422119,
+      "learning_rate": 9.4784697203637e-06,
+      "loss": 1.1545,
+      "step": 5375
+    },
+    {
+      "epoch": 2.5011579434923576,
+      "grad_norm": 3.1189873218536377,
+      "learning_rate": 9.264024704065878e-06,
+      "loss": 1.1353,
+      "step": 5400
+    },
+    {
+      "epoch": 2.5127373784159333,
+      "grad_norm": 3.5311825275421143,
+      "learning_rate": 9.049579687768056e-06,
+      "loss": 1.2599,
+      "step": 5425
+    },
+    {
+      "epoch": 2.5243168133395093,
+      "grad_norm": 1.4731173515319824,
+      "learning_rate": 8.835134671470235e-06,
+      "loss": 1.127,
+      "step": 5450
+    },
+    {
+      "epoch": 2.535896248263085,
+      "grad_norm": 2.797048807144165,
+      "learning_rate": 8.620689655172414e-06,
+      "loss": 1.0877,
+      "step": 5475
+    },
+    {
+      "epoch": 2.5474756831866605,
+      "grad_norm": 3.5394978523254395,
+      "learning_rate": 8.406244638874593e-06,
+      "loss": 1.1834,
+      "step": 5500
+    },
+    {
+      "epoch": 2.559055118110236,
+      "grad_norm": 4.206399440765381,
+      "learning_rate": 8.191799622576772e-06,
+      "loss": 1.1239,
+      "step": 5525
+    },
+    {
+      "epoch": 2.5706345530338117,
+      "grad_norm": 2.8601016998291016,
+      "learning_rate": 7.97735460627895e-06,
+      "loss": 1.217,
+      "step": 5550
+    },
+    {
+      "epoch": 2.5822139879573878,
+      "grad_norm": 3.3993771076202393,
+      "learning_rate": 7.762909589981129e-06,
+      "loss": 1.1876,
+      "step": 5575
+    },
+    {
+      "epoch": 2.5937934228809634,
+      "grad_norm": 2.0492095947265625,
+      "learning_rate": 7.548464573683307e-06,
+      "loss": 1.1923,
+      "step": 5600
+    },
+    {
+      "epoch": 2.605372857804539,
+      "grad_norm": 3.045842170715332,
+      "learning_rate": 7.3340195573854865e-06,
+      "loss": 1.2492,
+      "step": 5625
+    },
+    {
+      "epoch": 2.616952292728115,
+      "grad_norm": 1.7433972358703613,
+      "learning_rate": 7.119574541087666e-06,
+      "loss": 1.2652,
+      "step": 5650
+    },
+    {
+      "epoch": 2.6285317276516906,
+      "grad_norm": 2.3767240047454834,
+      "learning_rate": 6.905129524789844e-06,
+      "loss": 1.2284,
+      "step": 5675
+    },
+    {
+      "epoch": 2.6401111625752662,
+      "grad_norm": 4.228554725646973,
+      "learning_rate": 6.690684508492023e-06,
+      "loss": 1.1705,
+      "step": 5700
+    },
+    {
+      "epoch": 2.651690597498842,
+      "grad_norm": 4.027316570281982,
+      "learning_rate": 6.476239492194201e-06,
+      "loss": 1.1751,
+      "step": 5725
+    },
+    {
+      "epoch": 2.663270032422418,
+      "grad_norm": 2.5308732986450195,
+      "learning_rate": 6.2617944758963805e-06,
+      "loss": 1.2038,
+      "step": 5750
+    },
+    {
+      "epoch": 2.6748494673459935,
+      "grad_norm": 2.849998712539673,
+      "learning_rate": 6.04734945959856e-06,
+      "loss": 1.13,
+      "step": 5775
+    },
+    {
+      "epoch": 2.686428902269569,
+      "grad_norm": 1.7784459590911865,
+      "learning_rate": 5.832904443300738e-06,
+      "loss": 1.2206,
+      "step": 5800
+    },
+    {
+      "epoch": 2.698008337193145,
+      "grad_norm": 5.856213569641113,
+      "learning_rate": 5.618459427002916e-06,
+      "loss": 1.1501,
+      "step": 5825
+    },
+    {
+      "epoch": 2.7095877721167207,
+      "grad_norm": 2.401578664779663,
+      "learning_rate": 5.4040144107050954e-06,
+      "loss": 1.2501,
+      "step": 5850
+    },
+    {
+      "epoch": 2.7211672070402964,
+      "grad_norm": 2.7738897800445557,
+      "learning_rate": 5.189569394407274e-06,
+      "loss": 1.1535,
+      "step": 5875
+    },
+    {
+      "epoch": 2.732746641963872,
+      "grad_norm": 7.125967979431152,
+      "learning_rate": 4.975124378109453e-06,
+      "loss": 1.2025,
+      "step": 5900
+    },
+    {
+      "epoch": 2.744326076887448,
+      "grad_norm": 1.4013216495513916,
+      "learning_rate": 4.760679361811632e-06,
+      "loss": 1.2028,
+      "step": 5925
+    },
+    {
+      "epoch": 2.7559055118110236,
+      "grad_norm": 6.033567905426025,
+      "learning_rate": 4.54623434551381e-06,
+      "loss": 1.1888,
+      "step": 5950
+    },
+    {
+      "epoch": 2.767484946734599,
+      "grad_norm": 2.6083405017852783,
+      "learning_rate": 4.3317893292159895e-06,
+      "loss": 1.1546,
+      "step": 5975
+    },
+    {
+      "epoch": 2.7790643816581753,
+      "grad_norm": 3.179180860519409,
+      "learning_rate": 4.117344312918168e-06,
+      "loss": 1.2136,
+      "step": 6000
+    },
+    {
+      "epoch": 2.790643816581751,
+      "grad_norm": 1.8782720565795898,
+      "learning_rate": 3.902899296620346e-06,
+      "loss": 1.116,
+      "step": 6025
+    },
+    {
+      "epoch": 2.8022232515053265,
+      "grad_norm": 1.5270411968231201,
+      "learning_rate": 3.6884542803225257e-06,
+      "loss": 1.1756,
+      "step": 6050
+    },
+    {
+      "epoch": 2.813802686428902,
+      "grad_norm": 2.8853325843811035,
+      "learning_rate": 3.4740092640247044e-06,
+      "loss": 1.1418,
+      "step": 6075
+    },
+    {
+      "epoch": 2.825382121352478,
+      "grad_norm": 4.495181560516357,
+      "learning_rate": 3.259564247726883e-06,
+      "loss": 1.2573,
+      "step": 6100
+    },
+    {
+      "epoch": 2.8369615562760537,
+      "grad_norm": 3.329115390777588,
+      "learning_rate": 3.045119231429062e-06,
+      "loss": 1.1984,
+      "step": 6125
+    },
+    {
+      "epoch": 2.8485409911996293,
+      "grad_norm": 2.650596857070923,
+      "learning_rate": 2.8306742151312406e-06,
+      "loss": 1.1734,
+      "step": 6150
+    },
+    {
+      "epoch": 2.8601204261232054,
+      "grad_norm": 2.172297239303589,
+      "learning_rate": 2.616229198833419e-06,
+      "loss": 1.1968,
+      "step": 6175
+    },
+    {
+      "epoch": 2.871699861046781,
+      "grad_norm": 2.388245105743408,
+      "learning_rate": 2.401784182535598e-06,
+      "loss": 1.1787,
+      "step": 6200
+    },
+    {
+      "epoch": 2.8832792959703566,
+      "grad_norm": 2.751389265060425,
+      "learning_rate": 2.1873391662377767e-06,
+      "loss": 1.1884,
+      "step": 6225
+    },
+    {
+      "epoch": 2.894858730893932,
+      "grad_norm": 3.0782809257507324,
+      "learning_rate": 1.9728941499399555e-06,
+      "loss": 1.1554,
+      "step": 6250
+    },
+    {
+      "epoch": 2.9064381658175082,
+      "grad_norm": 2.73215913772583,
+      "learning_rate": 1.7584491336421344e-06,
+      "loss": 1.2045,
+      "step": 6275
+    },
+    {
+      "epoch": 2.918017600741084,
+      "grad_norm": 2.4149420261383057,
+      "learning_rate": 1.544004117344313e-06,
+      "loss": 1.1549,
+      "step": 6300
+    },
+    {
+      "epoch": 2.9295970356646595,
+      "grad_norm": 2.5072360038757324,
+      "learning_rate": 1.3295591010464916e-06,
+      "loss": 1.1785,
+      "step": 6325
+    },
+    {
+      "epoch": 2.9411764705882355,
+      "grad_norm": 3.0238680839538574,
+      "learning_rate": 1.1151140847486706e-06,
+      "loss": 1.1882,
+      "step": 6350
+    },
+    {
+      "epoch": 2.952755905511811,
+      "grad_norm": 1.7855486869812012,
+      "learning_rate": 9.006690684508493e-07,
+      "loss": 1.1809,
+      "step": 6375
+    },
+    {
+      "epoch": 2.9643353404353867,
+      "grad_norm": 5.080151557922363,
+      "learning_rate": 6.862240521530279e-07,
+      "loss": 1.2288,
+      "step": 6400
+    },
+    {
+      "epoch": 2.9759147753589623,
+      "grad_norm": 2.6682169437408447,
+      "learning_rate": 4.717790358552067e-07,
+      "loss": 1.2158,
+      "step": 6425
+    },
+    {
+      "epoch": 2.9874942102825384,
+      "grad_norm": 1.6711657047271729,
+      "learning_rate": 2.573340195573855e-07,
+      "loss": 1.1705,
+      "step": 6450
+    },
+    {
+      "epoch": 2.999073645206114,
+      "grad_norm": 3.766089916229248,
+      "learning_rate": 4.2889003259564246e-08,
+      "loss": 1.1834,
+      "step": 6475
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.39710843373493976,
+      "eval_f1_macro": 0.14211797171438428,
+      "eval_f1_micro": 0.39710843373493976,
+      "eval_f1_weighted": 0.22574498061234247,
+      "eval_loss": 1.2045246362686157,
+      "eval_precision_macro": 0.09927710843373494,
+      "eval_precision_micro": 0.39710843373493976,
+      "eval_precision_weighted": 0.15769510814341703,
+      "eval_recall_macro": 0.25,
+      "eval_recall_micro": 0.39710843373493976,
+      "eval_recall_weighted": 0.39710843373493976,
+      "eval_runtime": 4.9488,
+      "eval_samples_per_second": 419.296,
+      "eval_steps_per_second": 26.269,
+      "step": 6477
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 6477,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3407612468023296.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-6477/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06a543cf5bd52dcc2d1f0e6733649fac43cdf55d22872e60b4567f3a8e4671e9
+size 5240

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "FacebookAI/roberta-base",
+  "_num_labels": 4,
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "gb",
+    "1": "gc",
+    "2": "gf",
+    "3": "u1"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "gb": 0,
+    "gc": 1,
+    "gf": 2,
+    "u1": 3
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73a17fb75bfe03e6850ef726c0196e43a21f4930183cfc6d82d383c0ad389b13
+size 498618976

runs/Aug26_07-01-12_r-riken01-flan-t5-base-c75wamlq-d893c-y5aep/events.out.tfevents.1724655673.r-riken01-flan-t5-base-c75wamlq-d893c-y5aep.111.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c93bb8f585f4c03e976ce2fba0f4a7bf40ef076e0504c98eee61b66592ac10e
-size 54716

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a1a63056cbd133d4c3dd6bcda3a803b32deae3354766c5ebf194c2e26b29f5a
+size 62655

runs/Aug26_07-01-12_r-riken01-flan-t5-base-c75wamlq-d893c-y5aep/events.out.tfevents.1724656380.r-riken01-flan-t5-base-c75wamlq-d893c-y5aep.111.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66e9e22e6283b45e5cfc2cb7226aa1b371579820929eabaadbeb06edbe9db34c
+size 921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06a543cf5bd52dcc2d1f0e6733649fac43cdf55d22872e60b4567f3a8e4671e9
+size 5240

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "TrustPilot-balanced-location-roberta/autotrain-data",
+    "model": "FacebookAI/roberta-base",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 128,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "TrustPilot-balanced-location-roberta",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "riken01",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff