BTX24 commited on
Commit
0e42238
1 Parent(s): 351bcd0

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.6991409248766222,
4
+ "eval_f1": 0.6766571542539821,
5
+ "eval_loss": 0.7419390082359314,
6
+ "eval_precision": 0.6830062110035815,
7
+ "eval_recall": 0.6991409248766222,
8
+ "eval_runtime": 80.0564,
9
+ "eval_samples_per_second": 136.679,
10
+ "eval_steps_per_second": 2.136,
11
+ "total_flos": 3.3914202248568177e+19,
12
+ "train_loss": 0.790359598014787,
13
+ "train_runtime": 7828.8109,
14
+ "train_samples_per_second": 55.901,
15
+ "train_steps_per_second": 0.218
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.6991409248766222,
4
+ "eval_f1": 0.6766571542539821,
5
+ "eval_loss": 0.7419390082359314,
6
+ "eval_precision": 0.6830062110035815,
7
+ "eval_recall": 0.6991409248766222,
8
+ "eval_runtime": 80.0564,
9
+ "eval_samples_per_second": 136.679,
10
+ "eval_steps_per_second": 2.136
11
+ }
runs/Aug31_14-32-13_bad3846a2b09/events.out.tfevents.1725122684.bad3846a2b09.1416.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0da12348d4574c75322deaf8b4d24e9e33f0f9a22759a7366d25f7b52f7a05b2
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 3.3914202248568177e+19,
4
+ "train_loss": 0.790359598014787,
5
+ "train_runtime": 7828.8109,
6
+ "train_samples_per_second": 55.901,
7
+ "train_steps_per_second": 0.218
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6991409248766222,
3
+ "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-hongrui_mammogram_v_1/checkpoint-1710",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1710,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05847953216374269,
13
+ "grad_norm": 1.5846091508865356,
14
+ "learning_rate": 2.9239766081871343e-06,
15
+ "loss": 1.3844,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.11695906432748537,
20
+ "grad_norm": 1.26529061794281,
21
+ "learning_rate": 5.8479532163742686e-06,
22
+ "loss": 1.3401,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.17543859649122806,
27
+ "grad_norm": 1.0668294429779053,
28
+ "learning_rate": 8.771929824561403e-06,
29
+ "loss": 1.2478,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.23391812865497075,
34
+ "grad_norm": 0.7071924805641174,
35
+ "learning_rate": 1.1695906432748537e-05,
36
+ "loss": 1.1693,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.29239766081871343,
41
+ "grad_norm": 0.7040536403656006,
42
+ "learning_rate": 1.4619883040935673e-05,
43
+ "loss": 1.0874,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.3508771929824561,
48
+ "grad_norm": 0.5056448578834534,
49
+ "learning_rate": 1.7543859649122806e-05,
50
+ "loss": 1.0551,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.4093567251461988,
55
+ "grad_norm": 0.6976526975631714,
56
+ "learning_rate": 2.046783625730994e-05,
57
+ "loss": 1.0349,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.4678362573099415,
62
+ "grad_norm": 0.6909885406494141,
63
+ "learning_rate": 2.3391812865497074e-05,
64
+ "loss": 0.9862,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.5263157894736842,
69
+ "grad_norm": 1.2905045747756958,
70
+ "learning_rate": 2.6315789473684212e-05,
71
+ "loss": 0.9897,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.5847953216374269,
76
+ "grad_norm": 1.0667847394943237,
77
+ "learning_rate": 2.9239766081871346e-05,
78
+ "loss": 0.9316,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.6432748538011696,
83
+ "grad_norm": 0.7333235144615173,
84
+ "learning_rate": 3.216374269005848e-05,
85
+ "loss": 0.9224,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.7017543859649122,
90
+ "grad_norm": 0.8830112814903259,
91
+ "learning_rate": 3.508771929824561e-05,
92
+ "loss": 0.9127,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.7602339181286549,
97
+ "grad_norm": 0.7408013343811035,
98
+ "learning_rate": 3.8011695906432746e-05,
99
+ "loss": 0.9349,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.8187134502923976,
104
+ "grad_norm": 0.6464580297470093,
105
+ "learning_rate": 4.093567251461988e-05,
106
+ "loss": 0.9046,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.8771929824561403,
111
+ "grad_norm": 0.9568632245063782,
112
+ "learning_rate": 4.3859649122807014e-05,
113
+ "loss": 0.8981,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.935672514619883,
118
+ "grad_norm": 0.755339503288269,
119
+ "learning_rate": 4.678362573099415e-05,
120
+ "loss": 0.9012,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.9941520467836257,
125
+ "grad_norm": 0.8921021819114685,
126
+ "learning_rate": 4.970760233918128e-05,
127
+ "loss": 0.8576,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 1.0,
132
+ "eval_accuracy": 0.6677938219703893,
133
+ "eval_f1": 0.6066566978834349,
134
+ "eval_loss": 0.8430724143981934,
135
+ "eval_precision": 0.7751300124185229,
136
+ "eval_recall": 0.6677938219703893,
137
+ "eval_runtime": 78.2201,
138
+ "eval_samples_per_second": 139.887,
139
+ "eval_steps_per_second": 2.186,
140
+ "step": 171
141
+ },
142
+ {
143
+ "epoch": 1.0526315789473684,
144
+ "grad_norm": 0.7332887053489685,
145
+ "learning_rate": 4.970760233918128e-05,
146
+ "loss": 0.8527,
147
+ "step": 180
148
+ },
149
+ {
150
+ "epoch": 1.1111111111111112,
151
+ "grad_norm": 0.776443362236023,
152
+ "learning_rate": 4.938271604938271e-05,
153
+ "loss": 0.8561,
154
+ "step": 190
155
+ },
156
+ {
157
+ "epoch": 1.1695906432748537,
158
+ "grad_norm": 0.7149679660797119,
159
+ "learning_rate": 4.9057829759584143e-05,
160
+ "loss": 0.8435,
161
+ "step": 200
162
+ },
163
+ {
164
+ "epoch": 1.2280701754385965,
165
+ "grad_norm": 0.8708255290985107,
166
+ "learning_rate": 4.8732943469785574e-05,
167
+ "loss": 0.8332,
168
+ "step": 210
169
+ },
170
+ {
171
+ "epoch": 1.286549707602339,
172
+ "grad_norm": 0.8141400814056396,
173
+ "learning_rate": 4.8408057179987004e-05,
174
+ "loss": 0.8377,
175
+ "step": 220
176
+ },
177
+ {
178
+ "epoch": 1.345029239766082,
179
+ "grad_norm": 1.188214659690857,
180
+ "learning_rate": 4.8083170890188434e-05,
181
+ "loss": 0.8456,
182
+ "step": 230
183
+ },
184
+ {
185
+ "epoch": 1.4035087719298245,
186
+ "grad_norm": 0.6589232087135315,
187
+ "learning_rate": 4.7758284600389865e-05,
188
+ "loss": 0.8387,
189
+ "step": 240
190
+ },
191
+ {
192
+ "epoch": 1.4619883040935673,
193
+ "grad_norm": 0.9304301142692566,
194
+ "learning_rate": 4.7433398310591295e-05,
195
+ "loss": 0.8521,
196
+ "step": 250
197
+ },
198
+ {
199
+ "epoch": 1.52046783625731,
200
+ "grad_norm": 0.8288267254829407,
201
+ "learning_rate": 4.7108512020792725e-05,
202
+ "loss": 0.8453,
203
+ "step": 260
204
+ },
205
+ {
206
+ "epoch": 1.5789473684210527,
207
+ "grad_norm": 0.8100181221961975,
208
+ "learning_rate": 4.678362573099415e-05,
209
+ "loss": 0.8398,
210
+ "step": 270
211
+ },
212
+ {
213
+ "epoch": 1.6374269005847952,
214
+ "grad_norm": 1.2519994974136353,
215
+ "learning_rate": 4.645873944119558e-05,
216
+ "loss": 0.835,
217
+ "step": 280
218
+ },
219
+ {
220
+ "epoch": 1.695906432748538,
221
+ "grad_norm": 0.868988573551178,
222
+ "learning_rate": 4.613385315139701e-05,
223
+ "loss": 0.8,
224
+ "step": 290
225
+ },
226
+ {
227
+ "epoch": 1.7543859649122808,
228
+ "grad_norm": 0.7534909844398499,
229
+ "learning_rate": 4.580896686159844e-05,
230
+ "loss": 0.8103,
231
+ "step": 300
232
+ },
233
+ {
234
+ "epoch": 1.8128654970760234,
235
+ "grad_norm": 0.9954193234443665,
236
+ "learning_rate": 4.548408057179987e-05,
237
+ "loss": 0.8065,
238
+ "step": 310
239
+ },
240
+ {
241
+ "epoch": 1.871345029239766,
242
+ "grad_norm": 0.6459550857543945,
243
+ "learning_rate": 4.51591942820013e-05,
244
+ "loss": 0.8196,
245
+ "step": 320
246
+ },
247
+ {
248
+ "epoch": 1.9298245614035088,
249
+ "grad_norm": 0.9698415398597717,
250
+ "learning_rate": 4.483430799220273e-05,
251
+ "loss": 0.8464,
252
+ "step": 330
253
+ },
254
+ {
255
+ "epoch": 1.9883040935672516,
256
+ "grad_norm": 0.9862537980079651,
257
+ "learning_rate": 4.450942170240416e-05,
258
+ "loss": 0.8297,
259
+ "step": 340
260
+ },
261
+ {
262
+ "epoch": 2.0,
263
+ "eval_accuracy": 0.6791263023213306,
264
+ "eval_f1": 0.6182249859592962,
265
+ "eval_loss": 0.796485424041748,
266
+ "eval_precision": 0.6757921489303887,
267
+ "eval_recall": 0.6791263023213306,
268
+ "eval_runtime": 78.3952,
269
+ "eval_samples_per_second": 139.575,
270
+ "eval_steps_per_second": 2.181,
271
+ "step": 342
272
+ },
273
+ {
274
+ "epoch": 2.046783625730994,
275
+ "grad_norm": 0.7553840279579163,
276
+ "learning_rate": 4.418453541260559e-05,
277
+ "loss": 0.8262,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 2.1052631578947367,
282
+ "grad_norm": 0.8275452852249146,
283
+ "learning_rate": 4.3859649122807014e-05,
284
+ "loss": 0.809,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 2.1637426900584797,
289
+ "grad_norm": 0.6450644731521606,
290
+ "learning_rate": 4.3534762833008445e-05,
291
+ "loss": 0.7939,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 2.2222222222222223,
296
+ "grad_norm": 0.6809207201004028,
297
+ "learning_rate": 4.3209876543209875e-05,
298
+ "loss": 0.8157,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 2.280701754385965,
303
+ "grad_norm": 0.9741197228431702,
304
+ "learning_rate": 4.2884990253411305e-05,
305
+ "loss": 0.8126,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 2.3391812865497075,
310
+ "grad_norm": 0.6929029822349548,
311
+ "learning_rate": 4.2560103963612735e-05,
312
+ "loss": 0.8155,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 2.39766081871345,
317
+ "grad_norm": 0.836373507976532,
318
+ "learning_rate": 4.2235217673814166e-05,
319
+ "loss": 0.8145,
320
+ "step": 410
321
+ },
322
+ {
323
+ "epoch": 2.456140350877193,
324
+ "grad_norm": 0.7719972729682922,
325
+ "learning_rate": 4.1910331384015596e-05,
326
+ "loss": 0.7963,
327
+ "step": 420
328
+ },
329
+ {
330
+ "epoch": 2.5146198830409356,
331
+ "grad_norm": 0.5631088018417358,
332
+ "learning_rate": 4.1585445094217026e-05,
333
+ "loss": 0.7839,
334
+ "step": 430
335
+ },
336
+ {
337
+ "epoch": 2.573099415204678,
338
+ "grad_norm": 0.7167489528656006,
339
+ "learning_rate": 4.1260558804418457e-05,
340
+ "loss": 0.7837,
341
+ "step": 440
342
+ },
343
+ {
344
+ "epoch": 2.6315789473684212,
345
+ "grad_norm": 0.8713414072990417,
346
+ "learning_rate": 4.093567251461988e-05,
347
+ "loss": 0.8046,
348
+ "step": 450
349
+ },
350
+ {
351
+ "epoch": 2.690058479532164,
352
+ "grad_norm": 1.0731910467147827,
353
+ "learning_rate": 4.061078622482131e-05,
354
+ "loss": 0.7813,
355
+ "step": 460
356
+ },
357
+ {
358
+ "epoch": 2.7485380116959064,
359
+ "grad_norm": 0.6702953577041626,
360
+ "learning_rate": 4.028589993502274e-05,
361
+ "loss": 0.8076,
362
+ "step": 470
363
+ },
364
+ {
365
+ "epoch": 2.807017543859649,
366
+ "grad_norm": 0.6061651706695557,
367
+ "learning_rate": 3.996101364522417e-05,
368
+ "loss": 0.786,
369
+ "step": 480
370
+ },
371
+ {
372
+ "epoch": 2.8654970760233915,
373
+ "grad_norm": 0.9429291486740112,
374
+ "learning_rate": 3.96361273554256e-05,
375
+ "loss": 0.817,
376
+ "step": 490
377
+ },
378
+ {
379
+ "epoch": 2.9239766081871346,
380
+ "grad_norm": 0.7229118347167969,
381
+ "learning_rate": 3.931124106562703e-05,
382
+ "loss": 0.8003,
383
+ "step": 500
384
+ },
385
+ {
386
+ "epoch": 2.982456140350877,
387
+ "grad_norm": 0.702900230884552,
388
+ "learning_rate": 3.898635477582846e-05,
389
+ "loss": 0.8303,
390
+ "step": 510
391
+ },
392
+ {
393
+ "epoch": 3.0,
394
+ "eval_accuracy": 0.6842441966733687,
395
+ "eval_f1": 0.6360238176585905,
396
+ "eval_loss": 0.7872248888015747,
397
+ "eval_precision": 0.6704334160874683,
398
+ "eval_recall": 0.6842441966733687,
399
+ "eval_runtime": 77.468,
400
+ "eval_samples_per_second": 141.245,
401
+ "eval_steps_per_second": 2.207,
402
+ "step": 513
403
+ },
404
+ {
405
+ "epoch": 3.0409356725146197,
406
+ "grad_norm": 0.6098015308380127,
407
+ "learning_rate": 3.866146848602989e-05,
408
+ "loss": 0.7717,
409
+ "step": 520
410
+ },
411
+ {
412
+ "epoch": 3.0994152046783627,
413
+ "grad_norm": 0.7494928240776062,
414
+ "learning_rate": 3.8336582196231315e-05,
415
+ "loss": 0.7807,
416
+ "step": 530
417
+ },
418
+ {
419
+ "epoch": 3.1578947368421053,
420
+ "grad_norm": 0.8039376735687256,
421
+ "learning_rate": 3.8011695906432746e-05,
422
+ "loss": 0.79,
423
+ "step": 540
424
+ },
425
+ {
426
+ "epoch": 3.216374269005848,
427
+ "grad_norm": 0.687044084072113,
428
+ "learning_rate": 3.7686809616634176e-05,
429
+ "loss": 0.8004,
430
+ "step": 550
431
+ },
432
+ {
433
+ "epoch": 3.2748538011695905,
434
+ "grad_norm": 0.9368821978569031,
435
+ "learning_rate": 3.7361923326835606e-05,
436
+ "loss": 0.8002,
437
+ "step": 560
438
+ },
439
+ {
440
+ "epoch": 3.3333333333333335,
441
+ "grad_norm": 1.0392132997512817,
442
+ "learning_rate": 3.7037037037037037e-05,
443
+ "loss": 0.8042,
444
+ "step": 570
445
+ },
446
+ {
447
+ "epoch": 3.391812865497076,
448
+ "grad_norm": 0.5632928013801575,
449
+ "learning_rate": 3.671215074723847e-05,
450
+ "loss": 0.7746,
451
+ "step": 580
452
+ },
453
+ {
454
+ "epoch": 3.4502923976608186,
455
+ "grad_norm": 0.6281052231788635,
456
+ "learning_rate": 3.63872644574399e-05,
457
+ "loss": 0.7691,
458
+ "step": 590
459
+ },
460
+ {
461
+ "epoch": 3.5087719298245617,
462
+ "grad_norm": 0.6072255969047546,
463
+ "learning_rate": 3.606237816764133e-05,
464
+ "loss": 0.7552,
465
+ "step": 600
466
+ },
467
+ {
468
+ "epoch": 3.5672514619883042,
469
+ "grad_norm": 0.6243124604225159,
470
+ "learning_rate": 3.573749187784276e-05,
471
+ "loss": 0.7905,
472
+ "step": 610
473
+ },
474
+ {
475
+ "epoch": 3.625730994152047,
476
+ "grad_norm": 0.8322011828422546,
477
+ "learning_rate": 3.541260558804418e-05,
478
+ "loss": 0.7772,
479
+ "step": 620
480
+ },
481
+ {
482
+ "epoch": 3.6842105263157894,
483
+ "grad_norm": 0.9370966553688049,
484
+ "learning_rate": 3.508771929824561e-05,
485
+ "loss": 0.7683,
486
+ "step": 630
487
+ },
488
+ {
489
+ "epoch": 3.742690058479532,
490
+ "grad_norm": 0.6632276177406311,
491
+ "learning_rate": 3.476283300844704e-05,
492
+ "loss": 0.7853,
493
+ "step": 640
494
+ },
495
+ {
496
+ "epoch": 3.801169590643275,
497
+ "grad_norm": 0.678115963935852,
498
+ "learning_rate": 3.443794671864847e-05,
499
+ "loss": 0.7691,
500
+ "step": 650
501
+ },
502
+ {
503
+ "epoch": 3.8596491228070176,
504
+ "grad_norm": 0.6135697960853577,
505
+ "learning_rate": 3.41130604288499e-05,
506
+ "loss": 0.7778,
507
+ "step": 660
508
+ },
509
+ {
510
+ "epoch": 3.91812865497076,
511
+ "grad_norm": 0.6042221188545227,
512
+ "learning_rate": 3.378817413905133e-05,
513
+ "loss": 0.7983,
514
+ "step": 670
515
+ },
516
+ {
517
+ "epoch": 3.976608187134503,
518
+ "grad_norm": 0.8058825135231018,
519
+ "learning_rate": 3.346328784925276e-05,
520
+ "loss": 0.7814,
521
+ "step": 680
522
+ },
523
+ {
524
+ "epoch": 4.0,
525
+ "eval_accuracy": 0.6843355876439408,
526
+ "eval_f1": 0.659727957333552,
527
+ "eval_loss": 0.7717081308364868,
528
+ "eval_precision": 0.6601378633948723,
529
+ "eval_recall": 0.6843355876439408,
530
+ "eval_runtime": 78.1449,
531
+ "eval_samples_per_second": 140.022,
532
+ "eval_steps_per_second": 2.188,
533
+ "step": 684
534
+ },
535
+ {
536
+ "epoch": 4.035087719298246,
537
+ "grad_norm": 0.966820478439331,
538
+ "learning_rate": 3.313840155945419e-05,
539
+ "loss": 0.7736,
540
+ "step": 690
541
+ },
542
+ {
543
+ "epoch": 4.093567251461988,
544
+ "grad_norm": 1.1176632642745972,
545
+ "learning_rate": 3.281351526965562e-05,
546
+ "loss": 0.7529,
547
+ "step": 700
548
+ },
549
+ {
550
+ "epoch": 4.152046783625731,
551
+ "grad_norm": 0.6780201196670532,
552
+ "learning_rate": 3.248862897985705e-05,
553
+ "loss": 0.7722,
554
+ "step": 710
555
+ },
556
+ {
557
+ "epoch": 4.2105263157894735,
558
+ "grad_norm": 0.8257865905761719,
559
+ "learning_rate": 3.216374269005848e-05,
560
+ "loss": 0.771,
561
+ "step": 720
562
+ },
563
+ {
564
+ "epoch": 4.269005847953216,
565
+ "grad_norm": 0.6105393767356873,
566
+ "learning_rate": 3.183885640025991e-05,
567
+ "loss": 0.7883,
568
+ "step": 730
569
+ },
570
+ {
571
+ "epoch": 4.3274853801169595,
572
+ "grad_norm": 0.8537980914115906,
573
+ "learning_rate": 3.151397011046134e-05,
574
+ "loss": 0.7698,
575
+ "step": 740
576
+ },
577
+ {
578
+ "epoch": 4.385964912280702,
579
+ "grad_norm": 0.8124959468841553,
580
+ "learning_rate": 3.118908382066277e-05,
581
+ "loss": 0.7737,
582
+ "step": 750
583
+ },
584
+ {
585
+ "epoch": 4.444444444444445,
586
+ "grad_norm": 0.8992810845375061,
587
+ "learning_rate": 3.08641975308642e-05,
588
+ "loss": 0.7809,
589
+ "step": 760
590
+ },
591
+ {
592
+ "epoch": 4.502923976608187,
593
+ "grad_norm": 0.6706241965293884,
594
+ "learning_rate": 3.053931124106563e-05,
595
+ "loss": 0.7629,
596
+ "step": 770
597
+ },
598
+ {
599
+ "epoch": 4.56140350877193,
600
+ "grad_norm": 0.8107186555862427,
601
+ "learning_rate": 3.0214424951267055e-05,
602
+ "loss": 0.7683,
603
+ "step": 780
604
+ },
605
+ {
606
+ "epoch": 4.619883040935672,
607
+ "grad_norm": 0.7054151296615601,
608
+ "learning_rate": 2.9889538661468486e-05,
609
+ "loss": 0.7367,
610
+ "step": 790
611
+ },
612
+ {
613
+ "epoch": 4.678362573099415,
614
+ "grad_norm": 0.7284806966781616,
615
+ "learning_rate": 2.9564652371669916e-05,
616
+ "loss": 0.7476,
617
+ "step": 800
618
+ },
619
+ {
620
+ "epoch": 4.7368421052631575,
621
+ "grad_norm": 1.1839812994003296,
622
+ "learning_rate": 2.9239766081871346e-05,
623
+ "loss": 0.7565,
624
+ "step": 810
625
+ },
626
+ {
627
+ "epoch": 4.7953216374269,
628
+ "grad_norm": 0.7781530618667603,
629
+ "learning_rate": 2.8914879792072773e-05,
630
+ "loss": 0.7737,
631
+ "step": 820
632
+ },
633
+ {
634
+ "epoch": 4.853801169590644,
635
+ "grad_norm": 0.7338679432868958,
636
+ "learning_rate": 2.8589993502274203e-05,
637
+ "loss": 0.7519,
638
+ "step": 830
639
+ },
640
+ {
641
+ "epoch": 4.912280701754386,
642
+ "grad_norm": 1.015286922454834,
643
+ "learning_rate": 2.8265107212475634e-05,
644
+ "loss": 0.8023,
645
+ "step": 840
646
+ },
647
+ {
648
+ "epoch": 4.970760233918129,
649
+ "grad_norm": 0.8456217050552368,
650
+ "learning_rate": 2.7940220922677064e-05,
651
+ "loss": 0.7768,
652
+ "step": 850
653
+ },
654
+ {
655
+ "epoch": 5.0,
656
+ "eval_accuracy": 0.6905501736428441,
657
+ "eval_f1": 0.6543647035652124,
658
+ "eval_loss": 0.7693981528282166,
659
+ "eval_precision": 0.6774590515406133,
660
+ "eval_recall": 0.6905501736428441,
661
+ "eval_runtime": 77.1215,
662
+ "eval_samples_per_second": 141.88,
663
+ "eval_steps_per_second": 2.217,
664
+ "step": 855
665
+ },
666
+ {
667
+ "epoch": 5.029239766081871,
668
+ "grad_norm": 0.624717116355896,
669
+ "learning_rate": 2.761533463287849e-05,
670
+ "loss": 0.7482,
671
+ "step": 860
672
+ },
673
+ {
674
+ "epoch": 5.087719298245614,
675
+ "grad_norm": 0.752734363079071,
676
+ "learning_rate": 2.729044834307992e-05,
677
+ "loss": 0.7259,
678
+ "step": 870
679
+ },
680
+ {
681
+ "epoch": 5.146198830409356,
682
+ "grad_norm": 0.6503344178199768,
683
+ "learning_rate": 2.696556205328135e-05,
684
+ "loss": 0.7488,
685
+ "step": 880
686
+ },
687
+ {
688
+ "epoch": 5.204678362573099,
689
+ "grad_norm": 0.8620956540107727,
690
+ "learning_rate": 2.664067576348278e-05,
691
+ "loss": 0.7704,
692
+ "step": 890
693
+ },
694
+ {
695
+ "epoch": 5.2631578947368425,
696
+ "grad_norm": 0.6938666701316833,
697
+ "learning_rate": 2.6315789473684212e-05,
698
+ "loss": 0.76,
699
+ "step": 900
700
+ },
701
+ {
702
+ "epoch": 5.321637426900585,
703
+ "grad_norm": 0.8206263184547424,
704
+ "learning_rate": 2.599090318388564e-05,
705
+ "loss": 0.7685,
706
+ "step": 910
707
+ },
708
+ {
709
+ "epoch": 5.380116959064328,
710
+ "grad_norm": 0.8919401168823242,
711
+ "learning_rate": 2.566601689408707e-05,
712
+ "loss": 0.7673,
713
+ "step": 920
714
+ },
715
+ {
716
+ "epoch": 5.43859649122807,
717
+ "grad_norm": 0.9412862062454224,
718
+ "learning_rate": 2.53411306042885e-05,
719
+ "loss": 0.7403,
720
+ "step": 930
721
+ },
722
+ {
723
+ "epoch": 5.497076023391813,
724
+ "grad_norm": 1.1093353033065796,
725
+ "learning_rate": 2.501624431448993e-05,
726
+ "loss": 0.743,
727
+ "step": 940
728
+ },
729
+ {
730
+ "epoch": 5.555555555555555,
731
+ "grad_norm": 0.6838064193725586,
732
+ "learning_rate": 2.4691358024691357e-05,
733
+ "loss": 0.7641,
734
+ "step": 950
735
+ },
736
+ {
737
+ "epoch": 5.614035087719298,
738
+ "grad_norm": 0.7546567320823669,
739
+ "learning_rate": 2.4366471734892787e-05,
740
+ "loss": 0.7648,
741
+ "step": 960
742
+ },
743
+ {
744
+ "epoch": 5.6725146198830405,
745
+ "grad_norm": 0.7909204363822937,
746
+ "learning_rate": 2.4041585445094217e-05,
747
+ "loss": 0.7872,
748
+ "step": 970
749
+ },
750
+ {
751
+ "epoch": 5.730994152046784,
752
+ "grad_norm": 0.6969336867332458,
753
+ "learning_rate": 2.3716699155295647e-05,
754
+ "loss": 0.7638,
755
+ "step": 980
756
+ },
757
+ {
758
+ "epoch": 5.7894736842105265,
759
+ "grad_norm": 0.7838913202285767,
760
+ "learning_rate": 2.3391812865497074e-05,
761
+ "loss": 0.747,
762
+ "step": 990
763
+ },
764
+ {
765
+ "epoch": 5.847953216374269,
766
+ "grad_norm": 0.8347417712211609,
767
+ "learning_rate": 2.3066926575698505e-05,
768
+ "loss": 0.7749,
769
+ "step": 1000
770
+ },
771
+ {
772
+ "epoch": 5.906432748538012,
773
+ "grad_norm": 0.9646545052528381,
774
+ "learning_rate": 2.2742040285899935e-05,
775
+ "loss": 0.737,
776
+ "step": 1010
777
+ },
778
+ {
779
+ "epoch": 5.964912280701754,
780
+ "grad_norm": 0.6134990453720093,
781
+ "learning_rate": 2.2417153996101365e-05,
782
+ "loss": 0.7415,
783
+ "step": 1020
784
+ },
785
+ {
786
+ "epoch": 6.0,
787
+ "eval_accuracy": 0.6962164138183148,
788
+ "eval_f1": 0.671796652878038,
789
+ "eval_loss": 0.7572136521339417,
790
+ "eval_precision": 0.6763761787994358,
791
+ "eval_recall": 0.6962164138183148,
792
+ "eval_runtime": 77.9563,
793
+ "eval_samples_per_second": 140.361,
794
+ "eval_steps_per_second": 2.194,
795
+ "step": 1026
796
+ },
797
+ {
798
+ "epoch": 6.023391812865497,
799
+ "grad_norm": 0.7049497961997986,
800
+ "learning_rate": 2.2092267706302795e-05,
801
+ "loss": 0.7598,
802
+ "step": 1030
803
+ },
804
+ {
805
+ "epoch": 6.081871345029239,
806
+ "grad_norm": 0.9780289530754089,
807
+ "learning_rate": 2.1767381416504222e-05,
808
+ "loss": 0.7472,
809
+ "step": 1040
810
+ },
811
+ {
812
+ "epoch": 6.140350877192983,
813
+ "grad_norm": 0.7058891654014587,
814
+ "learning_rate": 2.1442495126705653e-05,
815
+ "loss": 0.742,
816
+ "step": 1050
817
+ },
818
+ {
819
+ "epoch": 6.1988304093567255,
820
+ "grad_norm": 0.8734349012374878,
821
+ "learning_rate": 2.1117608836907083e-05,
822
+ "loss": 0.7581,
823
+ "step": 1060
824
+ },
825
+ {
826
+ "epoch": 6.257309941520468,
827
+ "grad_norm": 0.8839743733406067,
828
+ "learning_rate": 2.0792722547108513e-05,
829
+ "loss": 0.7516,
830
+ "step": 1070
831
+ },
832
+ {
833
+ "epoch": 6.315789473684211,
834
+ "grad_norm": 0.6963735222816467,
835
+ "learning_rate": 2.046783625730994e-05,
836
+ "loss": 0.7412,
837
+ "step": 1080
838
+ },
839
+ {
840
+ "epoch": 6.374269005847953,
841
+ "grad_norm": 0.9337784647941589,
842
+ "learning_rate": 2.014294996751137e-05,
843
+ "loss": 0.7402,
844
+ "step": 1090
845
+ },
846
+ {
847
+ "epoch": 6.432748538011696,
848
+ "grad_norm": 0.6648013591766357,
849
+ "learning_rate": 1.98180636777128e-05,
850
+ "loss": 0.7513,
851
+ "step": 1100
852
+ },
853
+ {
854
+ "epoch": 6.491228070175438,
855
+ "grad_norm": 1.072342872619629,
856
+ "learning_rate": 1.949317738791423e-05,
857
+ "loss": 0.7406,
858
+ "step": 1110
859
+ },
860
+ {
861
+ "epoch": 6.549707602339181,
862
+ "grad_norm": 1.0100135803222656,
863
+ "learning_rate": 1.9168291098115658e-05,
864
+ "loss": 0.7643,
865
+ "step": 1120
866
+ },
867
+ {
868
+ "epoch": 6.6081871345029235,
869
+ "grad_norm": 0.7687884569168091,
870
+ "learning_rate": 1.8843404808317088e-05,
871
+ "loss": 0.7404,
872
+ "step": 1130
873
+ },
874
+ {
875
+ "epoch": 6.666666666666667,
876
+ "grad_norm": 0.750688374042511,
877
+ "learning_rate": 1.8518518518518518e-05,
878
+ "loss": 0.7391,
879
+ "step": 1140
880
+ },
881
+ {
882
+ "epoch": 6.7251461988304095,
883
+ "grad_norm": 0.7010438442230225,
884
+ "learning_rate": 1.819363222871995e-05,
885
+ "loss": 0.7534,
886
+ "step": 1150
887
+ },
888
+ {
889
+ "epoch": 6.783625730994152,
890
+ "grad_norm": 1.0012060403823853,
891
+ "learning_rate": 1.786874593892138e-05,
892
+ "loss": 0.7485,
893
+ "step": 1160
894
+ },
895
+ {
896
+ "epoch": 6.842105263157895,
897
+ "grad_norm": 0.8860548734664917,
898
+ "learning_rate": 1.7543859649122806e-05,
899
+ "loss": 0.7606,
900
+ "step": 1170
901
+ },
902
+ {
903
+ "epoch": 6.900584795321637,
904
+ "grad_norm": 0.969633936882019,
905
+ "learning_rate": 1.7218973359324236e-05,
906
+ "loss": 0.7592,
907
+ "step": 1180
908
+ },
909
+ {
910
+ "epoch": 6.95906432748538,
911
+ "grad_norm": 0.8473331928253174,
912
+ "learning_rate": 1.6894087069525666e-05,
913
+ "loss": 0.7351,
914
+ "step": 1190
915
+ },
916
+ {
917
+ "epoch": 7.0,
918
+ "eval_accuracy": 0.692195211113142,
919
+ "eval_f1": 0.6568623393542826,
920
+ "eval_loss": 0.754936695098877,
921
+ "eval_precision": 0.6648205901494669,
922
+ "eval_recall": 0.692195211113142,
923
+ "eval_runtime": 77.9138,
924
+ "eval_samples_per_second": 140.437,
925
+ "eval_steps_per_second": 2.195,
926
+ "step": 1197
927
+ },
928
+ {
929
+ "epoch": 7.017543859649122,
930
+ "grad_norm": 0.7504809498786926,
931
+ "learning_rate": 1.6569200779727097e-05,
932
+ "loss": 0.7102,
933
+ "step": 1200
934
+ },
935
+ {
936
+ "epoch": 7.076023391812866,
937
+ "grad_norm": 1.306260585784912,
938
+ "learning_rate": 1.6244314489928523e-05,
939
+ "loss": 0.7574,
940
+ "step": 1210
941
+ },
942
+ {
943
+ "epoch": 7.1345029239766085,
944
+ "grad_norm": 0.7214799523353577,
945
+ "learning_rate": 1.5919428200129954e-05,
946
+ "loss": 0.7355,
947
+ "step": 1220
948
+ },
949
+ {
950
+ "epoch": 7.192982456140351,
951
+ "grad_norm": 0.8254335522651672,
952
+ "learning_rate": 1.5594541910331384e-05,
953
+ "loss": 0.758,
954
+ "step": 1230
955
+ },
956
+ {
957
+ "epoch": 7.251461988304094,
958
+ "grad_norm": 0.8644353747367859,
959
+ "learning_rate": 1.5269655620532814e-05,
960
+ "loss": 0.7131,
961
+ "step": 1240
962
+ },
963
+ {
964
+ "epoch": 7.309941520467836,
965
+ "grad_norm": 0.7876085638999939,
966
+ "learning_rate": 1.4944769330734243e-05,
967
+ "loss": 0.7437,
968
+ "step": 1250
969
+ },
970
+ {
971
+ "epoch": 7.368421052631579,
972
+ "grad_norm": 0.904586136341095,
973
+ "learning_rate": 1.4619883040935673e-05,
974
+ "loss": 0.7464,
975
+ "step": 1260
976
+ },
977
+ {
978
+ "epoch": 7.426900584795321,
979
+ "grad_norm": 1.033260464668274,
980
+ "learning_rate": 1.4294996751137102e-05,
981
+ "loss": 0.7226,
982
+ "step": 1270
983
+ },
984
+ {
985
+ "epoch": 7.485380116959064,
986
+ "grad_norm": 0.9057112336158752,
987
+ "learning_rate": 1.3970110461338532e-05,
988
+ "loss": 0.7425,
989
+ "step": 1280
990
+ },
991
+ {
992
+ "epoch": 7.543859649122807,
993
+ "grad_norm": 0.8631776571273804,
994
+ "learning_rate": 1.364522417153996e-05,
995
+ "loss": 0.7178,
996
+ "step": 1290
997
+ },
998
+ {
999
+ "epoch": 7.60233918128655,
1000
+ "grad_norm": 0.8566320538520813,
1001
+ "learning_rate": 1.332033788174139e-05,
1002
+ "loss": 0.7151,
1003
+ "step": 1300
1004
+ },
1005
+ {
1006
+ "epoch": 7.6608187134502925,
1007
+ "grad_norm": 1.056127905845642,
1008
+ "learning_rate": 1.299545159194282e-05,
1009
+ "loss": 0.7574,
1010
+ "step": 1310
1011
+ },
1012
+ {
1013
+ "epoch": 7.719298245614035,
1014
+ "grad_norm": 1.0582066774368286,
1015
+ "learning_rate": 1.267056530214425e-05,
1016
+ "loss": 0.7462,
1017
+ "step": 1320
1018
+ },
1019
+ {
1020
+ "epoch": 7.777777777777778,
1021
+ "grad_norm": 1.0808275938034058,
1022
+ "learning_rate": 1.2345679012345678e-05,
1023
+ "loss": 0.7181,
1024
+ "step": 1330
1025
+ },
1026
+ {
1027
+ "epoch": 7.83625730994152,
1028
+ "grad_norm": 0.8452061414718628,
1029
+ "learning_rate": 1.2020792722547109e-05,
1030
+ "loss": 0.7686,
1031
+ "step": 1340
1032
+ },
1033
+ {
1034
+ "epoch": 7.894736842105263,
1035
+ "grad_norm": 0.7253689765930176,
1036
+ "learning_rate": 1.1695906432748537e-05,
1037
+ "loss": 0.7174,
1038
+ "step": 1350
1039
+ },
1040
+ {
1041
+ "epoch": 7.953216374269006,
1042
+ "grad_norm": 0.9176128506660461,
1043
+ "learning_rate": 1.1371020142949967e-05,
1044
+ "loss": 0.7197,
1045
+ "step": 1360
1046
+ },
1047
+ {
1048
+ "epoch": 8.0,
1049
+ "eval_accuracy": 0.6985925790531895,
1050
+ "eval_f1": 0.6855055863067254,
1051
+ "eval_loss": 0.7478834390640259,
1052
+ "eval_precision": 0.6925926647987316,
1053
+ "eval_recall": 0.6985925790531895,
1054
+ "eval_runtime": 77.8555,
1055
+ "eval_samples_per_second": 140.542,
1056
+ "eval_steps_per_second": 2.196,
1057
+ "step": 1368
1058
+ },
1059
+ {
1060
+ "epoch": 8.011695906432749,
1061
+ "grad_norm": 0.7897553443908691,
1062
+ "learning_rate": 1.1046133853151398e-05,
1063
+ "loss": 0.755,
1064
+ "step": 1370
1065
+ },
1066
+ {
1067
+ "epoch": 8.070175438596491,
1068
+ "grad_norm": 0.7324469685554504,
1069
+ "learning_rate": 1.0721247563352826e-05,
1070
+ "loss": 0.7243,
1071
+ "step": 1380
1072
+ },
1073
+ {
1074
+ "epoch": 8.128654970760234,
1075
+ "grad_norm": 0.7983306646347046,
1076
+ "learning_rate": 1.0396361273554257e-05,
1077
+ "loss": 0.7294,
1078
+ "step": 1390
1079
+ },
1080
+ {
1081
+ "epoch": 8.187134502923977,
1082
+ "grad_norm": 0.9110460877418518,
1083
+ "learning_rate": 1.0071474983755685e-05,
1084
+ "loss": 0.7027,
1085
+ "step": 1400
1086
+ },
1087
+ {
1088
+ "epoch": 8.24561403508772,
1089
+ "grad_norm": 0.9574342966079712,
1090
+ "learning_rate": 9.746588693957115e-06,
1091
+ "loss": 0.7131,
1092
+ "step": 1410
1093
+ },
1094
+ {
1095
+ "epoch": 8.304093567251462,
1096
+ "grad_norm": 0.7169631719589233,
1097
+ "learning_rate": 9.421702404158544e-06,
1098
+ "loss": 0.7365,
1099
+ "step": 1420
1100
+ },
1101
+ {
1102
+ "epoch": 8.362573099415204,
1103
+ "grad_norm": 0.9551491737365723,
1104
+ "learning_rate": 9.096816114359974e-06,
1105
+ "loss": 0.7313,
1106
+ "step": 1430
1107
+ },
1108
+ {
1109
+ "epoch": 8.421052631578947,
1110
+ "grad_norm": 1.159575343132019,
1111
+ "learning_rate": 8.771929824561403e-06,
1112
+ "loss": 0.7438,
1113
+ "step": 1440
1114
+ },
1115
+ {
1116
+ "epoch": 8.47953216374269,
1117
+ "grad_norm": 0.8166360259056091,
1118
+ "learning_rate": 8.447043534762833e-06,
1119
+ "loss": 0.7355,
1120
+ "step": 1450
1121
+ },
1122
+ {
1123
+ "epoch": 8.538011695906432,
1124
+ "grad_norm": 0.8369165062904358,
1125
+ "learning_rate": 8.122157244964262e-06,
1126
+ "loss": 0.7183,
1127
+ "step": 1460
1128
+ },
1129
+ {
1130
+ "epoch": 8.596491228070175,
1131
+ "grad_norm": 0.7923627495765686,
1132
+ "learning_rate": 7.797270955165692e-06,
1133
+ "loss": 0.711,
1134
+ "step": 1470
1135
+ },
1136
+ {
1137
+ "epoch": 8.654970760233919,
1138
+ "grad_norm": 0.7623910903930664,
1139
+ "learning_rate": 7.4723846653671214e-06,
1140
+ "loss": 0.7377,
1141
+ "step": 1480
1142
+ },
1143
+ {
1144
+ "epoch": 8.713450292397662,
1145
+ "grad_norm": 1.088745355606079,
1146
+ "learning_rate": 7.147498375568551e-06,
1147
+ "loss": 0.7199,
1148
+ "step": 1490
1149
+ },
1150
+ {
1151
+ "epoch": 8.771929824561404,
1152
+ "grad_norm": 0.8672430515289307,
1153
+ "learning_rate": 6.82261208576998e-06,
1154
+ "loss": 0.7234,
1155
+ "step": 1500
1156
+ },
1157
+ {
1158
+ "epoch": 8.830409356725147,
1159
+ "grad_norm": 0.77957683801651,
1160
+ "learning_rate": 6.49772579597141e-06,
1161
+ "loss": 0.7348,
1162
+ "step": 1510
1163
+ },
1164
+ {
1165
+ "epoch": 8.88888888888889,
1166
+ "grad_norm": 0.982523500919342,
1167
+ "learning_rate": 6.172839506172839e-06,
1168
+ "loss": 0.7348,
1169
+ "step": 1520
1170
+ },
1171
+ {
1172
+ "epoch": 8.947368421052632,
1173
+ "grad_norm": 0.8758224844932556,
1174
+ "learning_rate": 5.8479532163742686e-06,
1175
+ "loss": 0.7087,
1176
+ "step": 1530
1177
+ },
1178
+ {
1179
+ "epoch": 9.0,
1180
+ "eval_accuracy": 0.6978614512886127,
1181
+ "eval_f1": 0.6697434161663234,
1182
+ "eval_loss": 0.744518518447876,
1183
+ "eval_precision": 0.6792260519903555,
1184
+ "eval_recall": 0.6978614512886127,
1185
+ "eval_runtime": 77.9794,
1186
+ "eval_samples_per_second": 140.319,
1187
+ "eval_steps_per_second": 2.193,
1188
+ "step": 1539
1189
+ },
1190
+ {
1191
+ "epoch": 9.005847953216374,
1192
+ "grad_norm": 0.7864174246788025,
1193
+ "learning_rate": 5.523066926575699e-06,
1194
+ "loss": 0.713,
1195
+ "step": 1540
1196
+ },
1197
+ {
1198
+ "epoch": 9.064327485380117,
1199
+ "grad_norm": 1.0881294012069702,
1200
+ "learning_rate": 5.198180636777128e-06,
1201
+ "loss": 0.7092,
1202
+ "step": 1550
1203
+ },
1204
+ {
1205
+ "epoch": 9.12280701754386,
1206
+ "grad_norm": 1.0221022367477417,
1207
+ "learning_rate": 4.873294346978558e-06,
1208
+ "loss": 0.7463,
1209
+ "step": 1560
1210
+ },
1211
+ {
1212
+ "epoch": 9.181286549707602,
1213
+ "grad_norm": 0.8976357579231262,
1214
+ "learning_rate": 4.548408057179987e-06,
1215
+ "loss": 0.7392,
1216
+ "step": 1570
1217
+ },
1218
+ {
1219
+ "epoch": 9.239766081871345,
1220
+ "grad_norm": 0.8547308444976807,
1221
+ "learning_rate": 4.2235217673814166e-06,
1222
+ "loss": 0.7275,
1223
+ "step": 1580
1224
+ },
1225
+ {
1226
+ "epoch": 9.298245614035087,
1227
+ "grad_norm": 0.9341883063316345,
1228
+ "learning_rate": 3.898635477582846e-06,
1229
+ "loss": 0.7183,
1230
+ "step": 1590
1231
+ },
1232
+ {
1233
+ "epoch": 9.35672514619883,
1234
+ "grad_norm": 0.9447769522666931,
1235
+ "learning_rate": 3.5737491877842754e-06,
1236
+ "loss": 0.6966,
1237
+ "step": 1600
1238
+ },
1239
+ {
1240
+ "epoch": 9.415204678362572,
1241
+ "grad_norm": 1.0918306112289429,
1242
+ "learning_rate": 3.248862897985705e-06,
1243
+ "loss": 0.7161,
1244
+ "step": 1610
1245
+ },
1246
+ {
1247
+ "epoch": 9.473684210526315,
1248
+ "grad_norm": 0.9330850839614868,
1249
+ "learning_rate": 2.9239766081871343e-06,
1250
+ "loss": 0.7093,
1251
+ "step": 1620
1252
+ },
1253
+ {
1254
+ "epoch": 9.53216374269006,
1255
+ "grad_norm": 0.8420782685279846,
1256
+ "learning_rate": 2.599090318388564e-06,
1257
+ "loss": 0.7259,
1258
+ "step": 1630
1259
+ },
1260
+ {
1261
+ "epoch": 9.590643274853802,
1262
+ "grad_norm": 0.9159696698188782,
1263
+ "learning_rate": 2.2742040285899936e-06,
1264
+ "loss": 0.7265,
1265
+ "step": 1640
1266
+ },
1267
+ {
1268
+ "epoch": 9.649122807017545,
1269
+ "grad_norm": 1.0164194107055664,
1270
+ "learning_rate": 1.949317738791423e-06,
1271
+ "loss": 0.7458,
1272
+ "step": 1650
1273
+ },
1274
+ {
1275
+ "epoch": 9.707602339181287,
1276
+ "grad_norm": 0.8425694704055786,
1277
+ "learning_rate": 1.6244314489928524e-06,
1278
+ "loss": 0.7149,
1279
+ "step": 1660
1280
+ },
1281
+ {
1282
+ "epoch": 9.76608187134503,
1283
+ "grad_norm": 1.1017402410507202,
1284
+ "learning_rate": 1.299545159194282e-06,
1285
+ "loss": 0.7101,
1286
+ "step": 1670
1287
+ },
1288
+ {
1289
+ "epoch": 9.824561403508772,
1290
+ "grad_norm": 0.8220164179801941,
1291
+ "learning_rate": 9.746588693957115e-07,
1292
+ "loss": 0.7174,
1293
+ "step": 1680
1294
+ },
1295
+ {
1296
+ "epoch": 9.883040935672515,
1297
+ "grad_norm": 0.7968518733978271,
1298
+ "learning_rate": 6.49772579597141e-07,
1299
+ "loss": 0.7236,
1300
+ "step": 1690
1301
+ },
1302
+ {
1303
+ "epoch": 9.941520467836257,
1304
+ "grad_norm": 0.7491603493690491,
1305
+ "learning_rate": 3.248862897985705e-07,
1306
+ "loss": 0.6978,
1307
+ "step": 1700
1308
+ },
1309
+ {
1310
+ "epoch": 10.0,
1311
+ "grad_norm": 0.8862149119377136,
1312
+ "learning_rate": 0.0,
1313
+ "loss": 0.6977,
1314
+ "step": 1710
1315
+ },
1316
+ {
1317
+ "epoch": 10.0,
1318
+ "eval_accuracy": 0.6991409248766222,
1319
+ "eval_f1": 0.6766571542539821,
1320
+ "eval_loss": 0.7419390082359314,
1321
+ "eval_precision": 0.6830062110035815,
1322
+ "eval_recall": 0.6991409248766222,
1323
+ "eval_runtime": 78.4178,
1324
+ "eval_samples_per_second": 139.535,
1325
+ "eval_steps_per_second": 2.181,
1326
+ "step": 1710
1327
+ },
1328
+ {
1329
+ "epoch": 10.0,
1330
+ "step": 1710,
1331
+ "total_flos": 3.3914202248568177e+19,
1332
+ "train_loss": 0.790359598014787,
1333
+ "train_runtime": 7828.8109,
1334
+ "train_samples_per_second": 55.901,
1335
+ "train_steps_per_second": 0.218
1336
+ }
1337
+ ],
1338
+ "logging_steps": 10,
1339
+ "max_steps": 1710,
1340
+ "num_input_tokens_seen": 0,
1341
+ "num_train_epochs": 10,
1342
+ "save_steps": 500,
1343
+ "stateful_callbacks": {
1344
+ "TrainerControl": {
1345
+ "args": {
1346
+ "should_epoch_stop": false,
1347
+ "should_evaluate": false,
1348
+ "should_log": false,
1349
+ "should_save": true,
1350
+ "should_training_stop": true
1351
+ },
1352
+ "attributes": {}
1353
+ }
1354
+ },
1355
+ "total_flos": 3.3914202248568177e+19,
1356
+ "train_batch_size": 64,
1357
+ "trial_name": null,
1358
+ "trial_params": null
1359
+ }