riken01 commited on
Commit
7564718
1 Parent(s): 674f6c1

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-classification
6
+ base_model: FacebookAI/roberta-base
7
+ widget:
8
+ - text: "I love AutoTrain"
9
+ ---
10
+
11
+ # Model Trained Using AutoTrain
12
+
13
+ - Problem type: Text Classification
14
+
15
+ ## Validation Metrics
16
+ loss: 1.2045246362686157
17
+
18
+ f1_macro: 0.14211797171438428
19
+
20
+ f1_micro: 0.39710843373493976
21
+
22
+ f1_weighted: 0.22574498061234247
23
+
24
+ precision_macro: 0.09927710843373494
25
+
26
+ precision_micro: 0.39710843373493976
27
+
28
+ precision_weighted: 0.15769510814341703
29
+
30
+ recall_macro: 0.25
31
+
32
+ recall_micro: 0.39710843373493976
33
+
34
+ recall_weighted: 0.39710843373493976
35
+
36
+ accuracy: 0.39710843373493976
checkpoint-6477/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "FacebookAI/roberta-base",
3
+ "_num_labels": 4,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "gb",
16
+ "1": "gc",
17
+ "2": "gf",
18
+ "3": "u1"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "gb": 0,
24
+ "gc": 1,
25
+ "gf": 2,
26
+ "u1": 3
27
+ },
28
+ "layer_norm_eps": 1e-05,
29
+ "max_position_embeddings": 514,
30
+ "model_type": "roberta",
31
+ "num_attention_heads": 12,
32
+ "num_hidden_layers": 12,
33
+ "pad_token_id": 1,
34
+ "position_embedding_type": "absolute",
35
+ "problem_type": "single_label_classification",
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.44.1",
38
+ "type_vocab_size": 1,
39
+ "use_cache": true,
40
+ "vocab_size": 50265
41
+ }
checkpoint-6477/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a17fb75bfe03e6850ef726c0196e43a21f4930183cfc6d82d383c0ad389b13
3
+ size 498618976
checkpoint-6477/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d69bfd612fb760805383e21dd5994fbe3e53dff4b59a7c24736e4f5056824f6
3
+ size 997357818
checkpoint-6477/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d06602585737abe14a2a3dbbc8dfc0666b3b16b8640b7bf48d402f2643feeac
3
+ size 14244
checkpoint-6477/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d64402b7afed544df7b457720d32a0494798414dbdec4dd716643517114c5d28
3
+ size 1064
checkpoint-6477/trainer_state.json ADDED
@@ -0,0 +1,1909 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.2045246362686157,
3
+ "best_model_checkpoint": "TrustPilot-balanced-location-roberta/checkpoint-6477",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6477,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01157943492357573,
13
+ "grad_norm": 4.521674633026123,
14
+ "learning_rate": 1.9290123456790124e-06,
15
+ "loss": 1.4326,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.02315886984715146,
20
+ "grad_norm": 3.7206783294677734,
21
+ "learning_rate": 3.858024691358025e-06,
22
+ "loss": 1.3881,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.03473830477072719,
27
+ "grad_norm": 5.430587291717529,
28
+ "learning_rate": 5.787037037037038e-06,
29
+ "loss": 1.3384,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.04631773969430292,
34
+ "grad_norm": 5.371946334838867,
35
+ "learning_rate": 7.63888888888889e-06,
36
+ "loss": 1.2747,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.05789717461787865,
41
+ "grad_norm": 7.290340423583984,
42
+ "learning_rate": 9.5679012345679e-06,
43
+ "loss": 1.1889,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.06947660954145438,
48
+ "grad_norm": 7.079736232757568,
49
+ "learning_rate": 1.1496913580246914e-05,
50
+ "loss": 1.2006,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.0810560444650301,
55
+ "grad_norm": 8.647546768188477,
56
+ "learning_rate": 1.3425925925925928e-05,
57
+ "loss": 1.1498,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.09263547938860583,
62
+ "grad_norm": 12.410360336303711,
63
+ "learning_rate": 1.5354938271604938e-05,
64
+ "loss": 1.195,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.10421491431218156,
69
+ "grad_norm": 6.746768951416016,
70
+ "learning_rate": 1.728395061728395e-05,
71
+ "loss": 1.1531,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.1157943492357573,
76
+ "grad_norm": 4.067171573638916,
77
+ "learning_rate": 1.91358024691358e-05,
78
+ "loss": 1.2655,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.12737378415933304,
83
+ "grad_norm": 8.56696891784668,
84
+ "learning_rate": 2.1064814814814816e-05,
85
+ "loss": 1.1576,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.13895321908290875,
90
+ "grad_norm": 5.337764263153076,
91
+ "learning_rate": 2.2916666666666667e-05,
92
+ "loss": 1.1628,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.1505326540064845,
97
+ "grad_norm": 3.743994951248169,
98
+ "learning_rate": 2.484567901234568e-05,
99
+ "loss": 1.2353,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.1621120889300602,
104
+ "grad_norm": 7.327773571014404,
105
+ "learning_rate": 2.6774691358024694e-05,
106
+ "loss": 1.1858,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.17369152385363595,
111
+ "grad_norm": 4.512418270111084,
112
+ "learning_rate": 2.8703703703703706e-05,
113
+ "loss": 1.1889,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.18527095877721167,
118
+ "grad_norm": 5.212406635284424,
119
+ "learning_rate": 3.063271604938271e-05,
120
+ "loss": 1.2394,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.1968503937007874,
125
+ "grad_norm": 2.575005531311035,
126
+ "learning_rate": 3.256172839506173e-05,
127
+ "loss": 1.3288,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.20842982862436313,
132
+ "grad_norm": 5.339956283569336,
133
+ "learning_rate": 3.449074074074074e-05,
134
+ "loss": 1.2452,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.22000926354793887,
139
+ "grad_norm": 5.540539741516113,
140
+ "learning_rate": 3.6419753086419754e-05,
141
+ "loss": 1.1581,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.2315886984715146,
146
+ "grad_norm": 4.318514347076416,
147
+ "learning_rate": 3.8348765432098766e-05,
148
+ "loss": 1.1733,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.24316813339509033,
153
+ "grad_norm": 2.906097173690796,
154
+ "learning_rate": 4.027777777777778e-05,
155
+ "loss": 1.1512,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.2547475683186661,
160
+ "grad_norm": 1.404222011566162,
161
+ "learning_rate": 4.220679012345679e-05,
162
+ "loss": 1.3561,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.2663270032422418,
167
+ "grad_norm": 2.1675493717193604,
168
+ "learning_rate": 4.413580246913581e-05,
169
+ "loss": 1.1446,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.2779064381658175,
174
+ "grad_norm": 4.99737548828125,
175
+ "learning_rate": 4.6064814814814814e-05,
176
+ "loss": 1.2864,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.2894858730893932,
181
+ "grad_norm": 1.7532799243927002,
182
+ "learning_rate": 4.799382716049383e-05,
183
+ "loss": 1.269,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.301065308012969,
188
+ "grad_norm": 3.0470633506774902,
189
+ "learning_rate": 4.9922839506172845e-05,
190
+ "loss": 1.2261,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.3126447429365447,
195
+ "grad_norm": 6.678360939025879,
196
+ "learning_rate": 4.9794132784354094e-05,
197
+ "loss": 1.1333,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.3242241778601204,
202
+ "grad_norm": 4.9130449295043945,
203
+ "learning_rate": 4.957968776805627e-05,
204
+ "loss": 1.1869,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.33580361278369614,
209
+ "grad_norm": 4.097753047943115,
210
+ "learning_rate": 4.936524275175845e-05,
211
+ "loss": 1.2102,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.3473830477072719,
216
+ "grad_norm": 3.7552871704101562,
217
+ "learning_rate": 4.915079773546063e-05,
218
+ "loss": 1.1769,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.3589624826308476,
223
+ "grad_norm": 5.447041988372803,
224
+ "learning_rate": 4.893635271916281e-05,
225
+ "loss": 1.2407,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.37054191755442334,
230
+ "grad_norm": 3.041606903076172,
231
+ "learning_rate": 4.872190770286499e-05,
232
+ "loss": 1.2184,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.38212135247799905,
237
+ "grad_norm": 2.11730694770813,
238
+ "learning_rate": 4.850746268656717e-05,
239
+ "loss": 1.107,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.3937007874015748,
244
+ "grad_norm": 4.180285453796387,
245
+ "learning_rate": 4.829301767026935e-05,
246
+ "loss": 1.2928,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.40528022232515054,
251
+ "grad_norm": 11.423721313476562,
252
+ "learning_rate": 4.807857265397153e-05,
253
+ "loss": 1.1236,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.41685965724872626,
258
+ "grad_norm": 3.7874417304992676,
259
+ "learning_rate": 4.78641276376737e-05,
260
+ "loss": 1.1486,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.42843909217230197,
265
+ "grad_norm": 3.0819077491760254,
266
+ "learning_rate": 4.764968262137588e-05,
267
+ "loss": 1.2213,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.44001852709587774,
272
+ "grad_norm": 2.008617401123047,
273
+ "learning_rate": 4.743523760507806e-05,
274
+ "loss": 1.1765,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.45159796201945346,
279
+ "grad_norm": 2.2871665954589844,
280
+ "learning_rate": 4.722079258878024e-05,
281
+ "loss": 1.1775,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.4631773969430292,
286
+ "grad_norm": 3.751568555831909,
287
+ "learning_rate": 4.7006347572482416e-05,
288
+ "loss": 1.153,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.4747568318666049,
293
+ "grad_norm": 2.8901615142822266,
294
+ "learning_rate": 4.6791902556184595e-05,
295
+ "loss": 1.2544,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.48633626679018066,
300
+ "grad_norm": 2.7572152614593506,
301
+ "learning_rate": 4.6577457539886774e-05,
302
+ "loss": 1.1789,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.4979157017137564,
307
+ "grad_norm": 2.2316782474517822,
308
+ "learning_rate": 4.6363012523588953e-05,
309
+ "loss": 1.2035,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.5094951366373321,
314
+ "grad_norm": 2.4344851970672607,
315
+ "learning_rate": 4.614856750729113e-05,
316
+ "loss": 1.1438,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.5210745715609079,
321
+ "grad_norm": 2.271672010421753,
322
+ "learning_rate": 4.593412249099331e-05,
323
+ "loss": 1.1742,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.5326540064844836,
328
+ "grad_norm": 4.836185932159424,
329
+ "learning_rate": 4.571967747469549e-05,
330
+ "loss": 1.1543,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.5442334414080593,
335
+ "grad_norm": 3.8218131065368652,
336
+ "learning_rate": 4.550523245839767e-05,
337
+ "loss": 1.1536,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.555812876331635,
342
+ "grad_norm": 2.6469738483428955,
343
+ "learning_rate": 4.529078744209985e-05,
344
+ "loss": 1.1915,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.5673923112552107,
349
+ "grad_norm": 5.130224227905273,
350
+ "learning_rate": 4.507634242580203e-05,
351
+ "loss": 1.177,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.5789717461787864,
356
+ "grad_norm": 3.587254047393799,
357
+ "learning_rate": 4.486189740950421e-05,
358
+ "loss": 1.2532,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.5905511811023622,
363
+ "grad_norm": 1.91807222366333,
364
+ "learning_rate": 4.464745239320639e-05,
365
+ "loss": 1.2081,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.602130616025938,
370
+ "grad_norm": 2.0937275886535645,
371
+ "learning_rate": 4.4433007376908566e-05,
372
+ "loss": 1.247,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.6137100509495137,
377
+ "grad_norm": 4.973937511444092,
378
+ "learning_rate": 4.4218562360610745e-05,
379
+ "loss": 1.1864,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.6252894858730894,
384
+ "grad_norm": 4.225080490112305,
385
+ "learning_rate": 4.4004117344312924e-05,
386
+ "loss": 1.3265,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.6368689207966651,
391
+ "grad_norm": 3.563711166381836,
392
+ "learning_rate": 4.3789672328015096e-05,
393
+ "loss": 1.2219,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.6484483557202408,
398
+ "grad_norm": 2.596768856048584,
399
+ "learning_rate": 4.3575227311717276e-05,
400
+ "loss": 1.2472,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.6600277906438166,
405
+ "grad_norm": 2.263674020767212,
406
+ "learning_rate": 4.3360782295419455e-05,
407
+ "loss": 1.1993,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.6716072255673923,
412
+ "grad_norm": 2.2922544479370117,
413
+ "learning_rate": 4.3146337279121634e-05,
414
+ "loss": 1.1379,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.683186660490968,
419
+ "grad_norm": 5.394362449645996,
420
+ "learning_rate": 4.293189226282381e-05,
421
+ "loss": 1.1085,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.6947660954145438,
426
+ "grad_norm": 5.802165508270264,
427
+ "learning_rate": 4.271744724652599e-05,
428
+ "loss": 1.1417,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.7063455303381195,
433
+ "grad_norm": 3.079671621322632,
434
+ "learning_rate": 4.250300223022817e-05,
435
+ "loss": 1.2111,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.7179249652616952,
440
+ "grad_norm": 2.836214303970337,
441
+ "learning_rate": 4.228855721393035e-05,
442
+ "loss": 1.2457,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.729504400185271,
447
+ "grad_norm": 6.700901985168457,
448
+ "learning_rate": 4.207411219763253e-05,
449
+ "loss": 1.1931,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.7410838351088467,
454
+ "grad_norm": 4.292962551116943,
455
+ "learning_rate": 4.18596671813347e-05,
456
+ "loss": 1.2196,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.7526632700324224,
461
+ "grad_norm": 2.4369819164276123,
462
+ "learning_rate": 4.164522216503689e-05,
463
+ "loss": 1.1819,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.7642427049559981,
468
+ "grad_norm": 5.2853474617004395,
469
+ "learning_rate": 4.143077714873907e-05,
470
+ "loss": 1.1874,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.7758221398795738,
475
+ "grad_norm": 1.9918153285980225,
476
+ "learning_rate": 4.1216332132441246e-05,
477
+ "loss": 1.1866,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.7874015748031497,
482
+ "grad_norm": 5.2242231369018555,
483
+ "learning_rate": 4.1001887116143425e-05,
484
+ "loss": 1.1775,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.7989810097267254,
489
+ "grad_norm": 2.511507272720337,
490
+ "learning_rate": 4.0787442099845605e-05,
491
+ "loss": 1.2418,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 0.8105604446503011,
496
+ "grad_norm": 2.0094120502471924,
497
+ "learning_rate": 4.0572997083547784e-05,
498
+ "loss": 1.2168,
499
+ "step": 1750
500
+ },
501
+ {
502
+ "epoch": 0.8221398795738768,
503
+ "grad_norm": 2.8366715908050537,
504
+ "learning_rate": 4.035855206724996e-05,
505
+ "loss": 1.217,
506
+ "step": 1775
507
+ },
508
+ {
509
+ "epoch": 0.8337193144974525,
510
+ "grad_norm": 4.902674674987793,
511
+ "learning_rate": 4.014410705095214e-05,
512
+ "loss": 1.1925,
513
+ "step": 1800
514
+ },
515
+ {
516
+ "epoch": 0.8452987494210282,
517
+ "grad_norm": 2.4211857318878174,
518
+ "learning_rate": 3.992966203465432e-05,
519
+ "loss": 1.1716,
520
+ "step": 1825
521
+ },
522
+ {
523
+ "epoch": 0.8568781843446039,
524
+ "grad_norm": 4.9972381591796875,
525
+ "learning_rate": 3.9715217018356493e-05,
526
+ "loss": 1.1871,
527
+ "step": 1850
528
+ },
529
+ {
530
+ "epoch": 0.8684576192681797,
531
+ "grad_norm": 3.486520290374756,
532
+ "learning_rate": 3.950077200205867e-05,
533
+ "loss": 1.2001,
534
+ "step": 1875
535
+ },
536
+ {
537
+ "epoch": 0.8800370541917555,
538
+ "grad_norm": 2.2144150733947754,
539
+ "learning_rate": 3.928632698576085e-05,
540
+ "loss": 1.232,
541
+ "step": 1900
542
+ },
543
+ {
544
+ "epoch": 0.8916164891153312,
545
+ "grad_norm": 6.714953899383545,
546
+ "learning_rate": 3.907188196946303e-05,
547
+ "loss": 1.1767,
548
+ "step": 1925
549
+ },
550
+ {
551
+ "epoch": 0.9031959240389069,
552
+ "grad_norm": 6.166855812072754,
553
+ "learning_rate": 3.885743695316521e-05,
554
+ "loss": 1.1167,
555
+ "step": 1950
556
+ },
557
+ {
558
+ "epoch": 0.9147753589624826,
559
+ "grad_norm": 3.6272430419921875,
560
+ "learning_rate": 3.864299193686739e-05,
561
+ "loss": 1.2003,
562
+ "step": 1975
563
+ },
564
+ {
565
+ "epoch": 0.9263547938860583,
566
+ "grad_norm": 5.192286014556885,
567
+ "learning_rate": 3.842854692056957e-05,
568
+ "loss": 1.2661,
569
+ "step": 2000
570
+ },
571
+ {
572
+ "epoch": 0.9379342288096341,
573
+ "grad_norm": 3.80322003364563,
574
+ "learning_rate": 3.821410190427175e-05,
575
+ "loss": 1.2087,
576
+ "step": 2025
577
+ },
578
+ {
579
+ "epoch": 0.9495136637332098,
580
+ "grad_norm": 2.330951690673828,
581
+ "learning_rate": 3.799965688797393e-05,
582
+ "loss": 1.2075,
583
+ "step": 2050
584
+ },
585
+ {
586
+ "epoch": 0.9610930986567855,
587
+ "grad_norm": 3.1722116470336914,
588
+ "learning_rate": 3.77852118716761e-05,
589
+ "loss": 1.1741,
590
+ "step": 2075
591
+ },
592
+ {
593
+ "epoch": 0.9726725335803613,
594
+ "grad_norm": 3.6307098865509033,
595
+ "learning_rate": 3.757076685537828e-05,
596
+ "loss": 1.1772,
597
+ "step": 2100
598
+ },
599
+ {
600
+ "epoch": 0.984251968503937,
601
+ "grad_norm": 2.525423765182495,
602
+ "learning_rate": 3.735632183908046e-05,
603
+ "loss": 1.1213,
604
+ "step": 2125
605
+ },
606
+ {
607
+ "epoch": 0.9958314034275128,
608
+ "grad_norm": 2.655104160308838,
609
+ "learning_rate": 3.7141876822782637e-05,
610
+ "loss": 1.1762,
611
+ "step": 2150
612
+ },
613
+ {
614
+ "epoch": 1.0,
615
+ "eval_accuracy": 0.39710843373493976,
616
+ "eval_f1_macro": 0.14211797171438428,
617
+ "eval_f1_micro": 0.39710843373493976,
618
+ "eval_f1_weighted": 0.22574498061234247,
619
+ "eval_loss": 1.2174618244171143,
620
+ "eval_precision_macro": 0.09927710843373494,
621
+ "eval_precision_micro": 0.39710843373493976,
622
+ "eval_precision_weighted": 0.15769510814341703,
623
+ "eval_recall_macro": 0.25,
624
+ "eval_recall_micro": 0.39710843373493976,
625
+ "eval_recall_weighted": 0.39710843373493976,
626
+ "eval_runtime": 4.9841,
627
+ "eval_samples_per_second": 416.326,
628
+ "eval_steps_per_second": 26.083,
629
+ "step": 2159
630
+ },
631
+ {
632
+ "epoch": 1.0074108383510885,
633
+ "grad_norm": 2.2906293869018555,
634
+ "learning_rate": 3.6927431806484816e-05,
635
+ "loss": 1.2513,
636
+ "step": 2175
637
+ },
638
+ {
639
+ "epoch": 1.0189902732746643,
640
+ "grad_norm": 4.764555931091309,
641
+ "learning_rate": 3.6712986790187e-05,
642
+ "loss": 1.2133,
643
+ "step": 2200
644
+ },
645
+ {
646
+ "epoch": 1.03056970819824,
647
+ "grad_norm": 2.511648178100586,
648
+ "learning_rate": 3.649854177388918e-05,
649
+ "loss": 1.1227,
650
+ "step": 2225
651
+ },
652
+ {
653
+ "epoch": 1.0421491431218157,
654
+ "grad_norm": 5.154523849487305,
655
+ "learning_rate": 3.628409675759136e-05,
656
+ "loss": 1.1251,
657
+ "step": 2250
658
+ },
659
+ {
660
+ "epoch": 1.0537285780453913,
661
+ "grad_norm": 2.393103837966919,
662
+ "learning_rate": 3.606965174129354e-05,
663
+ "loss": 1.2593,
664
+ "step": 2275
665
+ },
666
+ {
667
+ "epoch": 1.0653080129689672,
668
+ "grad_norm": 2.7954907417297363,
669
+ "learning_rate": 3.585520672499571e-05,
670
+ "loss": 1.2476,
671
+ "step": 2300
672
+ },
673
+ {
674
+ "epoch": 1.0768874478925428,
675
+ "grad_norm": 4.258531093597412,
676
+ "learning_rate": 3.564076170869789e-05,
677
+ "loss": 1.2369,
678
+ "step": 2325
679
+ },
680
+ {
681
+ "epoch": 1.0884668828161186,
682
+ "grad_norm": 4.744079113006592,
683
+ "learning_rate": 3.542631669240007e-05,
684
+ "loss": 1.175,
685
+ "step": 2350
686
+ },
687
+ {
688
+ "epoch": 1.1000463177396944,
689
+ "grad_norm": 4.467709541320801,
690
+ "learning_rate": 3.521187167610225e-05,
691
+ "loss": 1.1174,
692
+ "step": 2375
693
+ },
694
+ {
695
+ "epoch": 1.11162575266327,
696
+ "grad_norm": 2.7314538955688477,
697
+ "learning_rate": 3.499742665980443e-05,
698
+ "loss": 1.2007,
699
+ "step": 2400
700
+ },
701
+ {
702
+ "epoch": 1.1232051875868458,
703
+ "grad_norm": 1.8456259965896606,
704
+ "learning_rate": 3.478298164350661e-05,
705
+ "loss": 1.2232,
706
+ "step": 2425
707
+ },
708
+ {
709
+ "epoch": 1.1347846225104214,
710
+ "grad_norm": 3.8557677268981934,
711
+ "learning_rate": 3.4568536627208786e-05,
712
+ "loss": 1.2338,
713
+ "step": 2450
714
+ },
715
+ {
716
+ "epoch": 1.1463640574339973,
717
+ "grad_norm": 3.338961124420166,
718
+ "learning_rate": 3.4354091610910965e-05,
719
+ "loss": 1.2004,
720
+ "step": 2475
721
+ },
722
+ {
723
+ "epoch": 1.1579434923575729,
724
+ "grad_norm": 2.3821332454681396,
725
+ "learning_rate": 3.4139646594613145e-05,
726
+ "loss": 1.1967,
727
+ "step": 2500
728
+ },
729
+ {
730
+ "epoch": 1.1695229272811487,
731
+ "grad_norm": 2.296182155609131,
732
+ "learning_rate": 3.3925201578315324e-05,
733
+ "loss": 1.1825,
734
+ "step": 2525
735
+ },
736
+ {
737
+ "epoch": 1.1811023622047245,
738
+ "grad_norm": 2.287925958633423,
739
+ "learning_rate": 3.3710756562017496e-05,
740
+ "loss": 1.1661,
741
+ "step": 2550
742
+ },
743
+ {
744
+ "epoch": 1.1926817971283001,
745
+ "grad_norm": 3.0742363929748535,
746
+ "learning_rate": 3.3496311545719675e-05,
747
+ "loss": 1.1855,
748
+ "step": 2575
749
+ },
750
+ {
751
+ "epoch": 1.204261232051876,
752
+ "grad_norm": 2.94059157371521,
753
+ "learning_rate": 3.3281866529421854e-05,
754
+ "loss": 1.186,
755
+ "step": 2600
756
+ },
757
+ {
758
+ "epoch": 1.2158406669754516,
759
+ "grad_norm": 5.658060073852539,
760
+ "learning_rate": 3.3067421513124034e-05,
761
+ "loss": 1.2018,
762
+ "step": 2625
763
+ },
764
+ {
765
+ "epoch": 1.2274201018990274,
766
+ "grad_norm": 4.225418567657471,
767
+ "learning_rate": 3.285297649682621e-05,
768
+ "loss": 1.1913,
769
+ "step": 2650
770
+ },
771
+ {
772
+ "epoch": 1.238999536822603,
773
+ "grad_norm": 3.121039867401123,
774
+ "learning_rate": 3.263853148052839e-05,
775
+ "loss": 1.1655,
776
+ "step": 2675
777
+ },
778
+ {
779
+ "epoch": 1.2505789717461788,
780
+ "grad_norm": 2.750720977783203,
781
+ "learning_rate": 3.242408646423057e-05,
782
+ "loss": 1.1818,
783
+ "step": 2700
784
+ },
785
+ {
786
+ "epoch": 1.2621584066697547,
787
+ "grad_norm": 3.299870491027832,
788
+ "learning_rate": 3.220964144793275e-05,
789
+ "loss": 1.2333,
790
+ "step": 2725
791
+ },
792
+ {
793
+ "epoch": 1.2737378415933303,
794
+ "grad_norm": 1.8936024904251099,
795
+ "learning_rate": 3.1995196431634936e-05,
796
+ "loss": 1.2091,
797
+ "step": 2750
798
+ },
799
+ {
800
+ "epoch": 1.2853172765169059,
801
+ "grad_norm": 4.938189506530762,
802
+ "learning_rate": 3.178075141533711e-05,
803
+ "loss": 1.2436,
804
+ "step": 2775
805
+ },
806
+ {
807
+ "epoch": 1.2968967114404817,
808
+ "grad_norm": 3.0422909259796143,
809
+ "learning_rate": 3.156630639903929e-05,
810
+ "loss": 1.2071,
811
+ "step": 2800
812
+ },
813
+ {
814
+ "epoch": 1.3084761463640575,
815
+ "grad_norm": 3.3571670055389404,
816
+ "learning_rate": 3.135186138274147e-05,
817
+ "loss": 1.1012,
818
+ "step": 2825
819
+ },
820
+ {
821
+ "epoch": 1.3200555812876331,
822
+ "grad_norm": 5.697854518890381,
823
+ "learning_rate": 3.1137416366443646e-05,
824
+ "loss": 1.1837,
825
+ "step": 2850
826
+ },
827
+ {
828
+ "epoch": 1.331635016211209,
829
+ "grad_norm": 2.8652396202087402,
830
+ "learning_rate": 3.0922971350145825e-05,
831
+ "loss": 1.2351,
832
+ "step": 2875
833
+ },
834
+ {
835
+ "epoch": 1.3432144511347845,
836
+ "grad_norm": 2.0512943267822266,
837
+ "learning_rate": 3.0708526333848004e-05,
838
+ "loss": 1.1621,
839
+ "step": 2900
840
+ },
841
+ {
842
+ "epoch": 1.3547938860583604,
843
+ "grad_norm": 3.4354703426361084,
844
+ "learning_rate": 3.0494081317550183e-05,
845
+ "loss": 1.1627,
846
+ "step": 2925
847
+ },
848
+ {
849
+ "epoch": 1.366373320981936,
850
+ "grad_norm": 2.0285403728485107,
851
+ "learning_rate": 3.0279636301252362e-05,
852
+ "loss": 1.2202,
853
+ "step": 2950
854
+ },
855
+ {
856
+ "epoch": 1.3779527559055118,
857
+ "grad_norm": 4.55291223526001,
858
+ "learning_rate": 3.006519128495454e-05,
859
+ "loss": 1.1935,
860
+ "step": 2975
861
+ },
862
+ {
863
+ "epoch": 1.3895321908290876,
864
+ "grad_norm": 3.867063045501709,
865
+ "learning_rate": 2.9850746268656714e-05,
866
+ "loss": 1.0448,
867
+ "step": 3000
868
+ },
869
+ {
870
+ "epoch": 1.4011116257526632,
871
+ "grad_norm": 3.6873021125793457,
872
+ "learning_rate": 2.9636301252358893e-05,
873
+ "loss": 1.2339,
874
+ "step": 3025
875
+ },
876
+ {
877
+ "epoch": 1.412691060676239,
878
+ "grad_norm": 2.2147438526153564,
879
+ "learning_rate": 2.9421856236061072e-05,
880
+ "loss": 1.1809,
881
+ "step": 3050
882
+ },
883
+ {
884
+ "epoch": 1.4242704955998147,
885
+ "grad_norm": 2.6401538848876953,
886
+ "learning_rate": 2.9207411219763255e-05,
887
+ "loss": 1.1291,
888
+ "step": 3075
889
+ },
890
+ {
891
+ "epoch": 1.4358499305233905,
892
+ "grad_norm": 2.2739460468292236,
893
+ "learning_rate": 2.8992966203465434e-05,
894
+ "loss": 1.1953,
895
+ "step": 3100
896
+ },
897
+ {
898
+ "epoch": 1.447429365446966,
899
+ "grad_norm": 1.2269738912582397,
900
+ "learning_rate": 2.8778521187167613e-05,
901
+ "loss": 1.2693,
902
+ "step": 3125
903
+ },
904
+ {
905
+ "epoch": 1.459008800370542,
906
+ "grad_norm": 4.429539680480957,
907
+ "learning_rate": 2.8564076170869792e-05,
908
+ "loss": 1.2717,
909
+ "step": 3150
910
+ },
911
+ {
912
+ "epoch": 1.4705882352941178,
913
+ "grad_norm": 3.294246196746826,
914
+ "learning_rate": 2.834963115457197e-05,
915
+ "loss": 1.1788,
916
+ "step": 3175
917
+ },
918
+ {
919
+ "epoch": 1.4821676702176934,
920
+ "grad_norm": 5.130248546600342,
921
+ "learning_rate": 2.813518613827415e-05,
922
+ "loss": 1.1758,
923
+ "step": 3200
924
+ },
925
+ {
926
+ "epoch": 1.4937471051412692,
927
+ "grad_norm": 2.9172611236572266,
928
+ "learning_rate": 2.792074112197633e-05,
929
+ "loss": 1.1474,
930
+ "step": 3225
931
+ },
932
+ {
933
+ "epoch": 1.5053265400648448,
934
+ "grad_norm": 2.7223055362701416,
935
+ "learning_rate": 2.7706296105678502e-05,
936
+ "loss": 1.2265,
937
+ "step": 3250
938
+ },
939
+ {
940
+ "epoch": 1.5169059749884206,
941
+ "grad_norm": 3.7259788513183594,
942
+ "learning_rate": 2.749185108938068e-05,
943
+ "loss": 1.2508,
944
+ "step": 3275
945
+ },
946
+ {
947
+ "epoch": 1.5284854099119962,
948
+ "grad_norm": 1.7198467254638672,
949
+ "learning_rate": 2.727740607308286e-05,
950
+ "loss": 1.1782,
951
+ "step": 3300
952
+ },
953
+ {
954
+ "epoch": 1.540064844835572,
955
+ "grad_norm": 1.9382416009902954,
956
+ "learning_rate": 2.706296105678504e-05,
957
+ "loss": 1.2074,
958
+ "step": 3325
959
+ },
960
+ {
961
+ "epoch": 1.5516442797591479,
962
+ "grad_norm": 2.2858059406280518,
963
+ "learning_rate": 2.6848516040487222e-05,
964
+ "loss": 1.1473,
965
+ "step": 3350
966
+ },
967
+ {
968
+ "epoch": 1.5632237146827235,
969
+ "grad_norm": 1.461945652961731,
970
+ "learning_rate": 2.66340710241894e-05,
971
+ "loss": 1.1901,
972
+ "step": 3375
973
+ },
974
+ {
975
+ "epoch": 1.574803149606299,
976
+ "grad_norm": 5.549622058868408,
977
+ "learning_rate": 2.641962600789158e-05,
978
+ "loss": 1.1618,
979
+ "step": 3400
980
+ },
981
+ {
982
+ "epoch": 1.586382584529875,
983
+ "grad_norm": 2.907963991165161,
984
+ "learning_rate": 2.620518099159376e-05,
985
+ "loss": 1.1767,
986
+ "step": 3425
987
+ },
988
+ {
989
+ "epoch": 1.5979620194534507,
990
+ "grad_norm": 1.6633723974227905,
991
+ "learning_rate": 2.599073597529594e-05,
992
+ "loss": 1.1997,
993
+ "step": 3450
994
+ },
995
+ {
996
+ "epoch": 1.6095414543770263,
997
+ "grad_norm": 5.5080742835998535,
998
+ "learning_rate": 2.577629095899811e-05,
999
+ "loss": 1.1881,
1000
+ "step": 3475
1001
+ },
1002
+ {
1003
+ "epoch": 1.6211208893006022,
1004
+ "grad_norm": 2.5473108291625977,
1005
+ "learning_rate": 2.556184594270029e-05,
1006
+ "loss": 1.15,
1007
+ "step": 3500
1008
+ },
1009
+ {
1010
+ "epoch": 1.632700324224178,
1011
+ "grad_norm": 2.0483057498931885,
1012
+ "learning_rate": 2.534740092640247e-05,
1013
+ "loss": 1.2271,
1014
+ "step": 3525
1015
+ },
1016
+ {
1017
+ "epoch": 1.6442797591477536,
1018
+ "grad_norm": 3.6027846336364746,
1019
+ "learning_rate": 2.513295591010465e-05,
1020
+ "loss": 1.2121,
1021
+ "step": 3550
1022
+ },
1023
+ {
1024
+ "epoch": 1.6558591940713292,
1025
+ "grad_norm": 3.154784917831421,
1026
+ "learning_rate": 2.4918510893806828e-05,
1027
+ "loss": 1.2706,
1028
+ "step": 3575
1029
+ },
1030
+ {
1031
+ "epoch": 1.667438628994905,
1032
+ "grad_norm": 5.780117511749268,
1033
+ "learning_rate": 2.4704065877509007e-05,
1034
+ "loss": 1.1916,
1035
+ "step": 3600
1036
+ },
1037
+ {
1038
+ "epoch": 1.6790180639184809,
1039
+ "grad_norm": 4.522841930389404,
1040
+ "learning_rate": 2.448962086121119e-05,
1041
+ "loss": 1.1344,
1042
+ "step": 3625
1043
+ },
1044
+ {
1045
+ "epoch": 1.6905974988420565,
1046
+ "grad_norm": 2.302856922149658,
1047
+ "learning_rate": 2.4275175844913365e-05,
1048
+ "loss": 1.1316,
1049
+ "step": 3650
1050
+ },
1051
+ {
1052
+ "epoch": 1.7021769337656323,
1053
+ "grad_norm": 3.6142489910125732,
1054
+ "learning_rate": 2.4060730828615544e-05,
1055
+ "loss": 1.1464,
1056
+ "step": 3675
1057
+ },
1058
+ {
1059
+ "epoch": 1.713756368689208,
1060
+ "grad_norm": 3.417003870010376,
1061
+ "learning_rate": 2.3846285812317723e-05,
1062
+ "loss": 1.2767,
1063
+ "step": 3700
1064
+ },
1065
+ {
1066
+ "epoch": 1.7253358036127837,
1067
+ "grad_norm": 1.8820807933807373,
1068
+ "learning_rate": 2.3631840796019903e-05,
1069
+ "loss": 1.1759,
1070
+ "step": 3725
1071
+ },
1072
+ {
1073
+ "epoch": 1.7369152385363593,
1074
+ "grad_norm": 2.0070981979370117,
1075
+ "learning_rate": 2.341739577972208e-05,
1076
+ "loss": 1.2357,
1077
+ "step": 3750
1078
+ },
1079
+ {
1080
+ "epoch": 1.7484946734599351,
1081
+ "grad_norm": 1.9160246849060059,
1082
+ "learning_rate": 2.3202950763424257e-05,
1083
+ "loss": 1.148,
1084
+ "step": 3775
1085
+ },
1086
+ {
1087
+ "epoch": 1.760074108383511,
1088
+ "grad_norm": 2.4420526027679443,
1089
+ "learning_rate": 2.2988505747126437e-05,
1090
+ "loss": 1.239,
1091
+ "step": 3800
1092
+ },
1093
+ {
1094
+ "epoch": 1.7716535433070866,
1095
+ "grad_norm": 1.5695481300354004,
1096
+ "learning_rate": 2.2774060730828616e-05,
1097
+ "loss": 1.1936,
1098
+ "step": 3825
1099
+ },
1100
+ {
1101
+ "epoch": 1.7832329782306622,
1102
+ "grad_norm": 2.090928077697754,
1103
+ "learning_rate": 2.2559615714530795e-05,
1104
+ "loss": 1.176,
1105
+ "step": 3850
1106
+ },
1107
+ {
1108
+ "epoch": 1.7948124131542382,
1109
+ "grad_norm": 2.7507429122924805,
1110
+ "learning_rate": 2.2345170698232974e-05,
1111
+ "loss": 1.1847,
1112
+ "step": 3875
1113
+ },
1114
+ {
1115
+ "epoch": 1.8063918480778138,
1116
+ "grad_norm": 2.7657129764556885,
1117
+ "learning_rate": 2.2130725681935153e-05,
1118
+ "loss": 1.1522,
1119
+ "step": 3900
1120
+ },
1121
+ {
1122
+ "epoch": 1.8179712830013894,
1123
+ "grad_norm": 4.012863636016846,
1124
+ "learning_rate": 2.1916280665637332e-05,
1125
+ "loss": 1.1398,
1126
+ "step": 3925
1127
+ },
1128
+ {
1129
+ "epoch": 1.8295507179249653,
1130
+ "grad_norm": 2.7316641807556152,
1131
+ "learning_rate": 2.170183564933951e-05,
1132
+ "loss": 1.1766,
1133
+ "step": 3950
1134
+ },
1135
+ {
1136
+ "epoch": 1.841130152848541,
1137
+ "grad_norm": 3.0468456745147705,
1138
+ "learning_rate": 2.148739063304169e-05,
1139
+ "loss": 1.2055,
1140
+ "step": 3975
1141
+ },
1142
+ {
1143
+ "epoch": 1.8527095877721167,
1144
+ "grad_norm": 2.0280911922454834,
1145
+ "learning_rate": 2.1272945616743866e-05,
1146
+ "loss": 1.184,
1147
+ "step": 4000
1148
+ },
1149
+ {
1150
+ "epoch": 1.8642890226956923,
1151
+ "grad_norm": 2.5638182163238525,
1152
+ "learning_rate": 2.1058500600446046e-05,
1153
+ "loss": 1.2357,
1154
+ "step": 4025
1155
+ },
1156
+ {
1157
+ "epoch": 1.8758684576192681,
1158
+ "grad_norm": 2.283189535140991,
1159
+ "learning_rate": 2.0844055584148225e-05,
1160
+ "loss": 1.1874,
1161
+ "step": 4050
1162
+ },
1163
+ {
1164
+ "epoch": 1.887447892542844,
1165
+ "grad_norm": 1.3770339488983154,
1166
+ "learning_rate": 2.0629610567850404e-05,
1167
+ "loss": 1.2674,
1168
+ "step": 4075
1169
+ },
1170
+ {
1171
+ "epoch": 1.8990273274664196,
1172
+ "grad_norm": 1.9555165767669678,
1173
+ "learning_rate": 2.0415165551552583e-05,
1174
+ "loss": 1.1922,
1175
+ "step": 4100
1176
+ },
1177
+ {
1178
+ "epoch": 1.9106067623899954,
1179
+ "grad_norm": 3.256969928741455,
1180
+ "learning_rate": 2.0200720535254762e-05,
1181
+ "loss": 1.1587,
1182
+ "step": 4125
1183
+ },
1184
+ {
1185
+ "epoch": 1.9221861973135712,
1186
+ "grad_norm": 1.853826642036438,
1187
+ "learning_rate": 1.998627551895694e-05,
1188
+ "loss": 1.1769,
1189
+ "step": 4150
1190
+ },
1191
+ {
1192
+ "epoch": 1.9337656322371468,
1193
+ "grad_norm": 2.319624662399292,
1194
+ "learning_rate": 1.977183050265912e-05,
1195
+ "loss": 1.1328,
1196
+ "step": 4175
1197
+ },
1198
+ {
1199
+ "epoch": 1.9453450671607224,
1200
+ "grad_norm": 2.720109701156616,
1201
+ "learning_rate": 1.95573854863613e-05,
1202
+ "loss": 1.1331,
1203
+ "step": 4200
1204
+ },
1205
+ {
1206
+ "epoch": 1.9569245020842982,
1207
+ "grad_norm": 4.079843044281006,
1208
+ "learning_rate": 1.9342940470063475e-05,
1209
+ "loss": 1.1479,
1210
+ "step": 4225
1211
+ },
1212
+ {
1213
+ "epoch": 1.968503937007874,
1214
+ "grad_norm": 3.2058353424072266,
1215
+ "learning_rate": 1.9128495453765654e-05,
1216
+ "loss": 1.1891,
1217
+ "step": 4250
1218
+ },
1219
+ {
1220
+ "epoch": 1.9800833719314497,
1221
+ "grad_norm": 2.1098670959472656,
1222
+ "learning_rate": 1.8914050437467834e-05,
1223
+ "loss": 1.1548,
1224
+ "step": 4275
1225
+ },
1226
+ {
1227
+ "epoch": 1.9916628068550255,
1228
+ "grad_norm": 2.4204399585723877,
1229
+ "learning_rate": 1.8699605421170013e-05,
1230
+ "loss": 1.2393,
1231
+ "step": 4300
1232
+ },
1233
+ {
1234
+ "epoch": 2.0,
1235
+ "eval_accuracy": 0.39710843373493976,
1236
+ "eval_f1_macro": 0.14211797171438428,
1237
+ "eval_f1_micro": 0.39710843373493976,
1238
+ "eval_f1_weighted": 0.22574498061234247,
1239
+ "eval_loss": 1.2068006992340088,
1240
+ "eval_precision_macro": 0.09927710843373494,
1241
+ "eval_precision_micro": 0.39710843373493976,
1242
+ "eval_precision_weighted": 0.15769510814341703,
1243
+ "eval_recall_macro": 0.25,
1244
+ "eval_recall_micro": 0.39710843373493976,
1245
+ "eval_recall_weighted": 0.39710843373493976,
1246
+ "eval_runtime": 5.002,
1247
+ "eval_samples_per_second": 414.833,
1248
+ "eval_steps_per_second": 25.99,
1249
+ "step": 4318
1250
+ },
1251
+ {
1252
+ "epoch": 2.0032422417786013,
1253
+ "grad_norm": 4.668676376342773,
1254
+ "learning_rate": 1.8485160404872192e-05,
1255
+ "loss": 1.2158,
1256
+ "step": 4325
1257
+ },
1258
+ {
1259
+ "epoch": 2.014821676702177,
1260
+ "grad_norm": 2.2886784076690674,
1261
+ "learning_rate": 1.8270715388574368e-05,
1262
+ "loss": 1.1545,
1263
+ "step": 4350
1264
+ },
1265
+ {
1266
+ "epoch": 2.0264011116257525,
1267
+ "grad_norm": 4.7184038162231445,
1268
+ "learning_rate": 1.805627037227655e-05,
1269
+ "loss": 1.2628,
1270
+ "step": 4375
1271
+ },
1272
+ {
1273
+ "epoch": 2.0379805465493286,
1274
+ "grad_norm": 1.9082050323486328,
1275
+ "learning_rate": 1.784182535597873e-05,
1276
+ "loss": 1.1398,
1277
+ "step": 4400
1278
+ },
1279
+ {
1280
+ "epoch": 2.049559981472904,
1281
+ "grad_norm": 2.6623945236206055,
1282
+ "learning_rate": 1.762738033968091e-05,
1283
+ "loss": 1.182,
1284
+ "step": 4425
1285
+ },
1286
+ {
1287
+ "epoch": 2.06113941639648,
1288
+ "grad_norm": 5.472179412841797,
1289
+ "learning_rate": 1.7412935323383088e-05,
1290
+ "loss": 1.2342,
1291
+ "step": 4450
1292
+ },
1293
+ {
1294
+ "epoch": 2.0727188513200554,
1295
+ "grad_norm": 2.2513480186462402,
1296
+ "learning_rate": 1.7198490307085263e-05,
1297
+ "loss": 1.219,
1298
+ "step": 4475
1299
+ },
1300
+ {
1301
+ "epoch": 2.0842982862436314,
1302
+ "grad_norm": 2.979966878890991,
1303
+ "learning_rate": 1.6984045290787443e-05,
1304
+ "loss": 1.2084,
1305
+ "step": 4500
1306
+ },
1307
+ {
1308
+ "epoch": 2.095877721167207,
1309
+ "grad_norm": 4.656105041503906,
1310
+ "learning_rate": 1.6769600274489622e-05,
1311
+ "loss": 1.2289,
1312
+ "step": 4525
1313
+ },
1314
+ {
1315
+ "epoch": 2.1074571560907827,
1316
+ "grad_norm": 8.104940414428711,
1317
+ "learning_rate": 1.65551552581918e-05,
1318
+ "loss": 1.2331,
1319
+ "step": 4550
1320
+ },
1321
+ {
1322
+ "epoch": 2.1190365910143587,
1323
+ "grad_norm": 6.20884370803833,
1324
+ "learning_rate": 1.6340710241893977e-05,
1325
+ "loss": 1.2045,
1326
+ "step": 4575
1327
+ },
1328
+ {
1329
+ "epoch": 2.1306160259379343,
1330
+ "grad_norm": 3.645780086517334,
1331
+ "learning_rate": 1.6126265225596156e-05,
1332
+ "loss": 1.1754,
1333
+ "step": 4600
1334
+ },
1335
+ {
1336
+ "epoch": 2.14219546086151,
1337
+ "grad_norm": 2.960564136505127,
1338
+ "learning_rate": 1.5911820209298335e-05,
1339
+ "loss": 1.195,
1340
+ "step": 4625
1341
+ },
1342
+ {
1343
+ "epoch": 2.1537748957850855,
1344
+ "grad_norm": 2.6137890815734863,
1345
+ "learning_rate": 1.5697375193000517e-05,
1346
+ "loss": 1.099,
1347
+ "step": 4650
1348
+ },
1349
+ {
1350
+ "epoch": 2.1653543307086616,
1351
+ "grad_norm": 4.492011547088623,
1352
+ "learning_rate": 1.5482930176702697e-05,
1353
+ "loss": 1.2088,
1354
+ "step": 4675
1355
+ },
1356
+ {
1357
+ "epoch": 2.176933765632237,
1358
+ "grad_norm": 3.653522253036499,
1359
+ "learning_rate": 1.5268485160404872e-05,
1360
+ "loss": 1.2309,
1361
+ "step": 4700
1362
+ },
1363
+ {
1364
+ "epoch": 2.1885132005558128,
1365
+ "grad_norm": 2.6041195392608643,
1366
+ "learning_rate": 1.5054040144107052e-05,
1367
+ "loss": 1.2099,
1368
+ "step": 4725
1369
+ },
1370
+ {
1371
+ "epoch": 2.200092635479389,
1372
+ "grad_norm": 5.426154613494873,
1373
+ "learning_rate": 1.483959512780923e-05,
1374
+ "loss": 1.203,
1375
+ "step": 4750
1376
+ },
1377
+ {
1378
+ "epoch": 2.2116720704029644,
1379
+ "grad_norm": 2.009709358215332,
1380
+ "learning_rate": 1.462515011151141e-05,
1381
+ "loss": 1.1193,
1382
+ "step": 4775
1383
+ },
1384
+ {
1385
+ "epoch": 2.22325150532654,
1386
+ "grad_norm": 2.586091995239258,
1387
+ "learning_rate": 1.4410705095213589e-05,
1388
+ "loss": 1.1109,
1389
+ "step": 4800
1390
+ },
1391
+ {
1392
+ "epoch": 2.2348309402501156,
1393
+ "grad_norm": 1.9446204900741577,
1394
+ "learning_rate": 1.4196260078915766e-05,
1395
+ "loss": 1.2251,
1396
+ "step": 4825
1397
+ },
1398
+ {
1399
+ "epoch": 2.2464103751736917,
1400
+ "grad_norm": 2.2268826961517334,
1401
+ "learning_rate": 1.3981815062617946e-05,
1402
+ "loss": 1.1447,
1403
+ "step": 4850
1404
+ },
1405
+ {
1406
+ "epoch": 2.2579898100972673,
1407
+ "grad_norm": 4.135994911193848,
1408
+ "learning_rate": 1.3767370046320125e-05,
1409
+ "loss": 1.1326,
1410
+ "step": 4875
1411
+ },
1412
+ {
1413
+ "epoch": 2.269569245020843,
1414
+ "grad_norm": 1.3713667392730713,
1415
+ "learning_rate": 1.3552925030022304e-05,
1416
+ "loss": 1.2048,
1417
+ "step": 4900
1418
+ },
1419
+ {
1420
+ "epoch": 2.281148679944419,
1421
+ "grad_norm": 4.801929473876953,
1422
+ "learning_rate": 1.333848001372448e-05,
1423
+ "loss": 1.1726,
1424
+ "step": 4925
1425
+ },
1426
+ {
1427
+ "epoch": 2.2927281148679945,
1428
+ "grad_norm": 1.5106154680252075,
1429
+ "learning_rate": 1.312403499742666e-05,
1430
+ "loss": 1.2191,
1431
+ "step": 4950
1432
+ },
1433
+ {
1434
+ "epoch": 2.30430754979157,
1435
+ "grad_norm": 1.9938125610351562,
1436
+ "learning_rate": 1.290958998112884e-05,
1437
+ "loss": 1.181,
1438
+ "step": 4975
1439
+ },
1440
+ {
1441
+ "epoch": 2.3158869847151458,
1442
+ "grad_norm": 5.004785060882568,
1443
+ "learning_rate": 1.2695144964831019e-05,
1444
+ "loss": 1.1746,
1445
+ "step": 5000
1446
+ },
1447
+ {
1448
+ "epoch": 2.327466419638722,
1449
+ "grad_norm": 2.9320216178894043,
1450
+ "learning_rate": 1.2480699948533196e-05,
1451
+ "loss": 1.2399,
1452
+ "step": 5025
1453
+ },
1454
+ {
1455
+ "epoch": 2.3390458545622974,
1456
+ "grad_norm": 4.154562473297119,
1457
+ "learning_rate": 1.2266254932235375e-05,
1458
+ "loss": 1.2275,
1459
+ "step": 5050
1460
+ },
1461
+ {
1462
+ "epoch": 2.350625289485873,
1463
+ "grad_norm": 2.5340206623077393,
1464
+ "learning_rate": 1.2051809915937553e-05,
1465
+ "loss": 1.1873,
1466
+ "step": 5075
1467
+ },
1468
+ {
1469
+ "epoch": 2.362204724409449,
1470
+ "grad_norm": 2.7467281818389893,
1471
+ "learning_rate": 1.1837364899639734e-05,
1472
+ "loss": 1.2125,
1473
+ "step": 5100
1474
+ },
1475
+ {
1476
+ "epoch": 2.3737841593330247,
1477
+ "grad_norm": 2.1378886699676514,
1478
+ "learning_rate": 1.1622919883341913e-05,
1479
+ "loss": 1.1484,
1480
+ "step": 5125
1481
+ },
1482
+ {
1483
+ "epoch": 2.3853635942566003,
1484
+ "grad_norm": 1.7250367403030396,
1485
+ "learning_rate": 1.140847486704409e-05,
1486
+ "loss": 1.1293,
1487
+ "step": 5150
1488
+ },
1489
+ {
1490
+ "epoch": 2.396943029180176,
1491
+ "grad_norm": 4.303859233856201,
1492
+ "learning_rate": 1.119402985074627e-05,
1493
+ "loss": 1.2065,
1494
+ "step": 5175
1495
+ },
1496
+ {
1497
+ "epoch": 2.408522464103752,
1498
+ "grad_norm": 4.186789035797119,
1499
+ "learning_rate": 1.0979584834448447e-05,
1500
+ "loss": 1.1573,
1501
+ "step": 5200
1502
+ },
1503
+ {
1504
+ "epoch": 2.4201018990273275,
1505
+ "grad_norm": 2.763376474380493,
1506
+ "learning_rate": 1.0765139818150626e-05,
1507
+ "loss": 1.2093,
1508
+ "step": 5225
1509
+ },
1510
+ {
1511
+ "epoch": 2.431681333950903,
1512
+ "grad_norm": 4.167290210723877,
1513
+ "learning_rate": 1.0550694801852805e-05,
1514
+ "loss": 1.2073,
1515
+ "step": 5250
1516
+ },
1517
+ {
1518
+ "epoch": 2.4432607688744787,
1519
+ "grad_norm": 4.77427864074707,
1520
+ "learning_rate": 1.0336249785554984e-05,
1521
+ "loss": 1.2074,
1522
+ "step": 5275
1523
+ },
1524
+ {
1525
+ "epoch": 2.454840203798055,
1526
+ "grad_norm": 2.2546989917755127,
1527
+ "learning_rate": 1.0121804769257163e-05,
1528
+ "loss": 1.2089,
1529
+ "step": 5300
1530
+ },
1531
+ {
1532
+ "epoch": 2.4664196387216304,
1533
+ "grad_norm": 2.603929281234741,
1534
+ "learning_rate": 9.907359752959341e-06,
1535
+ "loss": 1.1774,
1536
+ "step": 5325
1537
+ },
1538
+ {
1539
+ "epoch": 2.477999073645206,
1540
+ "grad_norm": 2.2647411823272705,
1541
+ "learning_rate": 9.69291473666152e-06,
1542
+ "loss": 1.1509,
1543
+ "step": 5350
1544
+ },
1545
+ {
1546
+ "epoch": 2.4895785085687816,
1547
+ "grad_norm": 2.784689426422119,
1548
+ "learning_rate": 9.4784697203637e-06,
1549
+ "loss": 1.1545,
1550
+ "step": 5375
1551
+ },
1552
+ {
1553
+ "epoch": 2.5011579434923576,
1554
+ "grad_norm": 3.1189873218536377,
1555
+ "learning_rate": 9.264024704065878e-06,
1556
+ "loss": 1.1353,
1557
+ "step": 5400
1558
+ },
1559
+ {
1560
+ "epoch": 2.5127373784159333,
1561
+ "grad_norm": 3.5311825275421143,
1562
+ "learning_rate": 9.049579687768056e-06,
1563
+ "loss": 1.2599,
1564
+ "step": 5425
1565
+ },
1566
+ {
1567
+ "epoch": 2.5243168133395093,
1568
+ "grad_norm": 1.4731173515319824,
1569
+ "learning_rate": 8.835134671470235e-06,
1570
+ "loss": 1.127,
1571
+ "step": 5450
1572
+ },
1573
+ {
1574
+ "epoch": 2.535896248263085,
1575
+ "grad_norm": 2.797048807144165,
1576
+ "learning_rate": 8.620689655172414e-06,
1577
+ "loss": 1.0877,
1578
+ "step": 5475
1579
+ },
1580
+ {
1581
+ "epoch": 2.5474756831866605,
1582
+ "grad_norm": 3.5394978523254395,
1583
+ "learning_rate": 8.406244638874593e-06,
1584
+ "loss": 1.1834,
1585
+ "step": 5500
1586
+ },
1587
+ {
1588
+ "epoch": 2.559055118110236,
1589
+ "grad_norm": 4.206399440765381,
1590
+ "learning_rate": 8.191799622576772e-06,
1591
+ "loss": 1.1239,
1592
+ "step": 5525
1593
+ },
1594
+ {
1595
+ "epoch": 2.5706345530338117,
1596
+ "grad_norm": 2.8601016998291016,
1597
+ "learning_rate": 7.97735460627895e-06,
1598
+ "loss": 1.217,
1599
+ "step": 5550
1600
+ },
1601
+ {
1602
+ "epoch": 2.5822139879573878,
1603
+ "grad_norm": 3.3993771076202393,
1604
+ "learning_rate": 7.762909589981129e-06,
1605
+ "loss": 1.1876,
1606
+ "step": 5575
1607
+ },
1608
+ {
1609
+ "epoch": 2.5937934228809634,
1610
+ "grad_norm": 2.0492095947265625,
1611
+ "learning_rate": 7.548464573683307e-06,
1612
+ "loss": 1.1923,
1613
+ "step": 5600
1614
+ },
1615
+ {
1616
+ "epoch": 2.605372857804539,
1617
+ "grad_norm": 3.045842170715332,
1618
+ "learning_rate": 7.3340195573854865e-06,
1619
+ "loss": 1.2492,
1620
+ "step": 5625
1621
+ },
1622
+ {
1623
+ "epoch": 2.616952292728115,
1624
+ "grad_norm": 1.7433972358703613,
1625
+ "learning_rate": 7.119574541087666e-06,
1626
+ "loss": 1.2652,
1627
+ "step": 5650
1628
+ },
1629
+ {
1630
+ "epoch": 2.6285317276516906,
1631
+ "grad_norm": 2.3767240047454834,
1632
+ "learning_rate": 6.905129524789844e-06,
1633
+ "loss": 1.2284,
1634
+ "step": 5675
1635
+ },
1636
+ {
1637
+ "epoch": 2.6401111625752662,
1638
+ "grad_norm": 4.228554725646973,
1639
+ "learning_rate": 6.690684508492023e-06,
1640
+ "loss": 1.1705,
1641
+ "step": 5700
1642
+ },
1643
+ {
1644
+ "epoch": 2.651690597498842,
1645
+ "grad_norm": 4.027316570281982,
1646
+ "learning_rate": 6.476239492194201e-06,
1647
+ "loss": 1.1751,
1648
+ "step": 5725
1649
+ },
1650
+ {
1651
+ "epoch": 2.663270032422418,
1652
+ "grad_norm": 2.5308732986450195,
1653
+ "learning_rate": 6.2617944758963805e-06,
1654
+ "loss": 1.2038,
1655
+ "step": 5750
1656
+ },
1657
+ {
1658
+ "epoch": 2.6748494673459935,
1659
+ "grad_norm": 2.849998712539673,
1660
+ "learning_rate": 6.04734945959856e-06,
1661
+ "loss": 1.13,
1662
+ "step": 5775
1663
+ },
1664
+ {
1665
+ "epoch": 2.686428902269569,
1666
+ "grad_norm": 1.7784459590911865,
1667
+ "learning_rate": 5.832904443300738e-06,
1668
+ "loss": 1.2206,
1669
+ "step": 5800
1670
+ },
1671
+ {
1672
+ "epoch": 2.698008337193145,
1673
+ "grad_norm": 5.856213569641113,
1674
+ "learning_rate": 5.618459427002916e-06,
1675
+ "loss": 1.1501,
1676
+ "step": 5825
1677
+ },
1678
+ {
1679
+ "epoch": 2.7095877721167207,
1680
+ "grad_norm": 2.401578664779663,
1681
+ "learning_rate": 5.4040144107050954e-06,
1682
+ "loss": 1.2501,
1683
+ "step": 5850
1684
+ },
1685
+ {
1686
+ "epoch": 2.7211672070402964,
1687
+ "grad_norm": 2.7738897800445557,
1688
+ "learning_rate": 5.189569394407274e-06,
1689
+ "loss": 1.1535,
1690
+ "step": 5875
1691
+ },
1692
+ {
1693
+ "epoch": 2.732746641963872,
1694
+ "grad_norm": 7.125967979431152,
1695
+ "learning_rate": 4.975124378109453e-06,
1696
+ "loss": 1.2025,
1697
+ "step": 5900
1698
+ },
1699
+ {
1700
+ "epoch": 2.744326076887448,
1701
+ "grad_norm": 1.4013216495513916,
1702
+ "learning_rate": 4.760679361811632e-06,
1703
+ "loss": 1.2028,
1704
+ "step": 5925
1705
+ },
1706
+ {
1707
+ "epoch": 2.7559055118110236,
1708
+ "grad_norm": 6.033567905426025,
1709
+ "learning_rate": 4.54623434551381e-06,
1710
+ "loss": 1.1888,
1711
+ "step": 5950
1712
+ },
1713
+ {
1714
+ "epoch": 2.767484946734599,
1715
+ "grad_norm": 2.6083405017852783,
1716
+ "learning_rate": 4.3317893292159895e-06,
1717
+ "loss": 1.1546,
1718
+ "step": 5975
1719
+ },
1720
+ {
1721
+ "epoch": 2.7790643816581753,
1722
+ "grad_norm": 3.179180860519409,
1723
+ "learning_rate": 4.117344312918168e-06,
1724
+ "loss": 1.2136,
1725
+ "step": 6000
1726
+ },
1727
+ {
1728
+ "epoch": 2.790643816581751,
1729
+ "grad_norm": 1.8782720565795898,
1730
+ "learning_rate": 3.902899296620346e-06,
1731
+ "loss": 1.116,
1732
+ "step": 6025
1733
+ },
1734
+ {
1735
+ "epoch": 2.8022232515053265,
1736
+ "grad_norm": 1.5270411968231201,
1737
+ "learning_rate": 3.6884542803225257e-06,
1738
+ "loss": 1.1756,
1739
+ "step": 6050
1740
+ },
1741
+ {
1742
+ "epoch": 2.813802686428902,
1743
+ "grad_norm": 2.8853325843811035,
1744
+ "learning_rate": 3.4740092640247044e-06,
1745
+ "loss": 1.1418,
1746
+ "step": 6075
1747
+ },
1748
+ {
1749
+ "epoch": 2.825382121352478,
1750
+ "grad_norm": 4.495181560516357,
1751
+ "learning_rate": 3.259564247726883e-06,
1752
+ "loss": 1.2573,
1753
+ "step": 6100
1754
+ },
1755
+ {
1756
+ "epoch": 2.8369615562760537,
1757
+ "grad_norm": 3.329115390777588,
1758
+ "learning_rate": 3.045119231429062e-06,
1759
+ "loss": 1.1984,
1760
+ "step": 6125
1761
+ },
1762
+ {
1763
+ "epoch": 2.8485409911996293,
1764
+ "grad_norm": 2.650596857070923,
1765
+ "learning_rate": 2.8306742151312406e-06,
1766
+ "loss": 1.1734,
1767
+ "step": 6150
1768
+ },
1769
+ {
1770
+ "epoch": 2.8601204261232054,
1771
+ "grad_norm": 2.172297239303589,
1772
+ "learning_rate": 2.616229198833419e-06,
1773
+ "loss": 1.1968,
1774
+ "step": 6175
1775
+ },
1776
+ {
1777
+ "epoch": 2.871699861046781,
1778
+ "grad_norm": 2.388245105743408,
1779
+ "learning_rate": 2.401784182535598e-06,
1780
+ "loss": 1.1787,
1781
+ "step": 6200
1782
+ },
1783
+ {
1784
+ "epoch": 2.8832792959703566,
1785
+ "grad_norm": 2.751389265060425,
1786
+ "learning_rate": 2.1873391662377767e-06,
1787
+ "loss": 1.1884,
1788
+ "step": 6225
1789
+ },
1790
+ {
1791
+ "epoch": 2.894858730893932,
1792
+ "grad_norm": 3.0782809257507324,
1793
+ "learning_rate": 1.9728941499399555e-06,
1794
+ "loss": 1.1554,
1795
+ "step": 6250
1796
+ },
1797
+ {
1798
+ "epoch": 2.9064381658175082,
1799
+ "grad_norm": 2.73215913772583,
1800
+ "learning_rate": 1.7584491336421344e-06,
1801
+ "loss": 1.2045,
1802
+ "step": 6275
1803
+ },
1804
+ {
1805
+ "epoch": 2.918017600741084,
1806
+ "grad_norm": 2.4149420261383057,
1807
+ "learning_rate": 1.544004117344313e-06,
1808
+ "loss": 1.1549,
1809
+ "step": 6300
1810
+ },
1811
+ {
1812
+ "epoch": 2.9295970356646595,
1813
+ "grad_norm": 2.5072360038757324,
1814
+ "learning_rate": 1.3295591010464916e-06,
1815
+ "loss": 1.1785,
1816
+ "step": 6325
1817
+ },
1818
+ {
1819
+ "epoch": 2.9411764705882355,
1820
+ "grad_norm": 3.0238680839538574,
1821
+ "learning_rate": 1.1151140847486706e-06,
1822
+ "loss": 1.1882,
1823
+ "step": 6350
1824
+ },
1825
+ {
1826
+ "epoch": 2.952755905511811,
1827
+ "grad_norm": 1.7855486869812012,
1828
+ "learning_rate": 9.006690684508493e-07,
1829
+ "loss": 1.1809,
1830
+ "step": 6375
1831
+ },
1832
+ {
1833
+ "epoch": 2.9643353404353867,
1834
+ "grad_norm": 5.080151557922363,
1835
+ "learning_rate": 6.862240521530279e-07,
1836
+ "loss": 1.2288,
1837
+ "step": 6400
1838
+ },
1839
+ {
1840
+ "epoch": 2.9759147753589623,
1841
+ "grad_norm": 2.6682169437408447,
1842
+ "learning_rate": 4.717790358552067e-07,
1843
+ "loss": 1.2158,
1844
+ "step": 6425
1845
+ },
1846
+ {
1847
+ "epoch": 2.9874942102825384,
1848
+ "grad_norm": 1.6711657047271729,
1849
+ "learning_rate": 2.573340195573855e-07,
1850
+ "loss": 1.1705,
1851
+ "step": 6450
1852
+ },
1853
+ {
1854
+ "epoch": 2.999073645206114,
1855
+ "grad_norm": 3.766089916229248,
1856
+ "learning_rate": 4.2889003259564246e-08,
1857
+ "loss": 1.1834,
1858
+ "step": 6475
1859
+ },
1860
+ {
1861
+ "epoch": 3.0,
1862
+ "eval_accuracy": 0.39710843373493976,
1863
+ "eval_f1_macro": 0.14211797171438428,
1864
+ "eval_f1_micro": 0.39710843373493976,
1865
+ "eval_f1_weighted": 0.22574498061234247,
1866
+ "eval_loss": 1.2045246362686157,
1867
+ "eval_precision_macro": 0.09927710843373494,
1868
+ "eval_precision_micro": 0.39710843373493976,
1869
+ "eval_precision_weighted": 0.15769510814341703,
1870
+ "eval_recall_macro": 0.25,
1871
+ "eval_recall_micro": 0.39710843373493976,
1872
+ "eval_recall_weighted": 0.39710843373493976,
1873
+ "eval_runtime": 4.9488,
1874
+ "eval_samples_per_second": 419.296,
1875
+ "eval_steps_per_second": 26.269,
1876
+ "step": 6477
1877
+ }
1878
+ ],
1879
+ "logging_steps": 25,
1880
+ "max_steps": 6477,
1881
+ "num_input_tokens_seen": 0,
1882
+ "num_train_epochs": 3,
1883
+ "save_steps": 500,
1884
+ "stateful_callbacks": {
1885
+ "EarlyStoppingCallback": {
1886
+ "args": {
1887
+ "early_stopping_patience": 5,
1888
+ "early_stopping_threshold": 0.01
1889
+ },
1890
+ "attributes": {
1891
+ "early_stopping_patience_counter": 0
1892
+ }
1893
+ },
1894
+ "TrainerControl": {
1895
+ "args": {
1896
+ "should_epoch_stop": false,
1897
+ "should_evaluate": false,
1898
+ "should_log": false,
1899
+ "should_save": true,
1900
+ "should_training_stop": true
1901
+ },
1902
+ "attributes": {}
1903
+ }
1904
+ },
1905
+ "total_flos": 3407612468023296.0,
1906
+ "train_batch_size": 8,
1907
+ "trial_name": null,
1908
+ "trial_params": null
1909
+ }
checkpoint-6477/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06a543cf5bd52dcc2d1f0e6733649fac43cdf55d22872e60b4567f3a8e4671e9
3
+ size 5240
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "FacebookAI/roberta-base",
3
+ "_num_labels": 4,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "gb",
16
+ "1": "gc",
17
+ "2": "gf",
18
+ "3": "u1"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "gb": 0,
24
+ "gc": 1,
25
+ "gf": 2,
26
+ "u1": 3
27
+ },
28
+ "layer_norm_eps": 1e-05,
29
+ "max_position_embeddings": 514,
30
+ "model_type": "roberta",
31
+ "num_attention_heads": 12,
32
+ "num_hidden_layers": 12,
33
+ "pad_token_id": 1,
34
+ "position_embedding_type": "absolute",
35
+ "problem_type": "single_label_classification",
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.44.1",
38
+ "type_vocab_size": 1,
39
+ "use_cache": true,
40
+ "vocab_size": 50265
41
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a17fb75bfe03e6850ef726c0196e43a21f4930183cfc6d82d383c0ad389b13
3
+ size 498618976
runs/Aug26_07-01-12_r-riken01-flan-t5-base-c75wamlq-d893c-y5aep/events.out.tfevents.1724655673.r-riken01-flan-t5-base-c75wamlq-d893c-y5aep.111.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c93bb8f585f4c03e976ce2fba0f4a7bf40ef076e0504c98eee61b66592ac10e
3
- size 54716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1a63056cbd133d4c3dd6bcda3a803b32deae3354766c5ebf194c2e26b29f5a
3
+ size 62655
runs/Aug26_07-01-12_r-riken01-flan-t5-base-c75wamlq-d893c-y5aep/events.out.tfevents.1724656380.r-riken01-flan-t5-base-c75wamlq-d893c-y5aep.111.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e9e22e6283b45e5cfc2cb7226aa1b371579820929eabaadbeb06edbe9db34c
3
+ size 921
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "RobertaTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": "<unk>"
57
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06a543cf5bd52dcc2d1f0e6733649fac43cdf55d22872e60b4567f3a8e4671e9
3
+ size 5240
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "TrustPilot-balanced-location-roberta/autotrain-data",
3
+ "model": "FacebookAI/roberta-base",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "TrustPilot-balanced-location-roberta",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "riken01",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff