jkefeli commited on
Commit
dd92e3a
1 Parent(s): 7e685d3

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +1438 -0
trainer_state.json ADDED
@@ -0,0 +1,1438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9426883191871465,
3
+ "best_model_checkpoint": "model_output/prad_primary_output/prad_primary_rs0_clinicalbert_16bsize_512max_tokens_default_lr_roc_optim_20e_01-17-2023_02h-22m/checkpoint-190",
4
+ "epoch": 9.5,
5
+ "global_step": 190,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "eval_accuracy": 0.5373134328358209,
13
+ "eval_f1": 0.3574758574758575,
14
+ "eval_f1_weighted": 0.489443392428467,
15
+ "eval_loss": 0.9268761873245239,
16
+ "eval_roc_auc": 0.6107379598441218,
17
+ "eval_runtime": 1.2207,
18
+ "eval_samples_per_second": 54.886,
19
+ "eval_steps_per_second": 4.096,
20
+ "step": 2
21
+ },
22
+ {
23
+ "epoch": 0.2,
24
+ "learning_rate": 4.9500000000000004e-05,
25
+ "loss": 0.9899,
26
+ "step": 4
27
+ },
28
+ {
29
+ "epoch": 0.2,
30
+ "eval_accuracy": 0.4925373134328358,
31
+ "eval_f1": 0.22,
32
+ "eval_f1_weighted": 0.3250746268656717,
33
+ "eval_loss": 0.9124901294708252,
34
+ "eval_roc_auc": 0.5582552156247731,
35
+ "eval_runtime": 1.2158,
36
+ "eval_samples_per_second": 55.108,
37
+ "eval_steps_per_second": 4.113,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.3,
42
+ "eval_accuracy": 0.4925373134328358,
43
+ "eval_f1": 0.22,
44
+ "eval_f1_weighted": 0.3250746268656717,
45
+ "eval_loss": 0.981939971446991,
46
+ "eval_roc_auc": 0.4301269582562523,
47
+ "eval_runtime": 1.2194,
48
+ "eval_samples_per_second": 54.945,
49
+ "eval_steps_per_second": 4.1,
50
+ "step": 6
51
+ },
52
+ {
53
+ "epoch": 0.4,
54
+ "learning_rate": 4.9e-05,
55
+ "loss": 0.9933,
56
+ "step": 8
57
+ },
58
+ {
59
+ "epoch": 0.4,
60
+ "eval_accuracy": 0.4925373134328358,
61
+ "eval_f1": 0.22,
62
+ "eval_f1_weighted": 0.3250746268656717,
63
+ "eval_loss": 0.9560447931289673,
64
+ "eval_roc_auc": 0.5123977839606143,
65
+ "eval_runtime": 1.2219,
66
+ "eval_samples_per_second": 54.832,
67
+ "eval_steps_per_second": 4.092,
68
+ "step": 8
69
+ },
70
+ {
71
+ "epoch": 0.5,
72
+ "eval_accuracy": 0.4925373134328358,
73
+ "eval_f1": 0.22,
74
+ "eval_f1_weighted": 0.3250746268656717,
75
+ "eval_loss": 0.9014336466789246,
76
+ "eval_roc_auc": 0.5345296882078375,
77
+ "eval_runtime": 1.2192,
78
+ "eval_samples_per_second": 54.954,
79
+ "eval_steps_per_second": 4.101,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.6,
84
+ "learning_rate": 4.85e-05,
85
+ "loss": 1.0187,
86
+ "step": 12
87
+ },
88
+ {
89
+ "epoch": 0.6,
90
+ "eval_accuracy": 0.43283582089552236,
91
+ "eval_f1": 0.20138888888888887,
92
+ "eval_f1_weighted": 0.2615049751243781,
93
+ "eval_loss": 0.9236512184143066,
94
+ "eval_roc_auc": 0.4107449483209271,
95
+ "eval_runtime": 1.2182,
96
+ "eval_samples_per_second": 54.998,
97
+ "eval_steps_per_second": 4.104,
98
+ "step": 12
99
+ },
100
+ {
101
+ "epoch": 0.7,
102
+ "eval_accuracy": 0.43283582089552236,
103
+ "eval_f1": 0.20138888888888887,
104
+ "eval_f1_weighted": 0.2615049751243781,
105
+ "eval_loss": 0.9475049376487732,
106
+ "eval_roc_auc": 0.622050195769533,
107
+ "eval_runtime": 1.2161,
108
+ "eval_samples_per_second": 55.094,
109
+ "eval_steps_per_second": 4.112,
110
+ "step": 14
111
+ },
112
+ {
113
+ "epoch": 0.8,
114
+ "learning_rate": 4.8e-05,
115
+ "loss": 0.9181,
116
+ "step": 16
117
+ },
118
+ {
119
+ "epoch": 0.8,
120
+ "eval_accuracy": 0.43283582089552236,
121
+ "eval_f1": 0.20138888888888887,
122
+ "eval_f1_weighted": 0.2615049751243781,
123
+ "eval_loss": 0.9717263579368591,
124
+ "eval_roc_auc": 0.6353464355620174,
125
+ "eval_runtime": 1.2189,
126
+ "eval_samples_per_second": 54.968,
127
+ "eval_steps_per_second": 4.102,
128
+ "step": 16
129
+ },
130
+ {
131
+ "epoch": 0.9,
132
+ "eval_accuracy": 0.43283582089552236,
133
+ "eval_f1": 0.20138888888888887,
134
+ "eval_f1_weighted": 0.2615049751243781,
135
+ "eval_loss": 0.9334698915481567,
136
+ "eval_roc_auc": 0.6309403000759409,
137
+ "eval_runtime": 1.2236,
138
+ "eval_samples_per_second": 54.756,
139
+ "eval_steps_per_second": 4.086,
140
+ "step": 18
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "learning_rate": 4.75e-05,
145
+ "loss": 0.8413,
146
+ "step": 20
147
+ },
148
+ {
149
+ "epoch": 1.0,
150
+ "eval_accuracy": 0.4925373134328358,
151
+ "eval_f1": 0.282216999608304,
152
+ "eval_f1_weighted": 0.3793693109072733,
153
+ "eval_loss": 0.9048787355422974,
154
+ "eval_roc_auc": 0.6341481396372107,
155
+ "eval_runtime": 1.2194,
156
+ "eval_samples_per_second": 54.947,
157
+ "eval_steps_per_second": 4.101,
158
+ "step": 20
159
+ },
160
+ {
161
+ "epoch": 1.1,
162
+ "eval_accuracy": 0.5522388059701493,
163
+ "eval_f1": 0.33274578926752846,
164
+ "eval_f1_weighted": 0.47230650507743316,
165
+ "eval_loss": 0.8940262198448181,
166
+ "eval_roc_auc": 0.6367384434369768,
167
+ "eval_runtime": 1.2185,
168
+ "eval_samples_per_second": 54.986,
169
+ "eval_steps_per_second": 4.103,
170
+ "step": 22
171
+ },
172
+ {
173
+ "epoch": 1.2,
174
+ "learning_rate": 4.7e-05,
175
+ "loss": 0.8928,
176
+ "step": 24
177
+ },
178
+ {
179
+ "epoch": 1.2,
180
+ "eval_accuracy": 0.4925373134328358,
181
+ "eval_f1": 0.239192451174018,
182
+ "eval_f1_weighted": 0.34958191268804106,
183
+ "eval_loss": 0.8931290507316589,
184
+ "eval_roc_auc": 0.6255647978074341,
185
+ "eval_runtime": 1.2132,
186
+ "eval_samples_per_second": 55.227,
187
+ "eval_steps_per_second": 4.121,
188
+ "step": 24
189
+ },
190
+ {
191
+ "epoch": 1.3,
192
+ "eval_accuracy": 0.5522388059701493,
193
+ "eval_f1": 0.3825320512820512,
194
+ "eval_f1_weighted": 0.52978616532721,
195
+ "eval_loss": 0.8918970823287964,
196
+ "eval_roc_auc": 0.6465011472546505,
197
+ "eval_runtime": 1.2216,
198
+ "eval_samples_per_second": 54.848,
199
+ "eval_steps_per_second": 4.093,
200
+ "step": 26
201
+ },
202
+ {
203
+ "epoch": 1.4,
204
+ "learning_rate": 4.6500000000000005e-05,
205
+ "loss": 0.9183,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 1.4,
210
+ "eval_accuracy": 0.44776119402985076,
211
+ "eval_f1": 0.22311661506707947,
212
+ "eval_f1_weighted": 0.2932304422161638,
213
+ "eval_loss": 0.9188483357429504,
214
+ "eval_roc_auc": 0.6651756826192129,
215
+ "eval_runtime": 1.2181,
216
+ "eval_samples_per_second": 55.005,
217
+ "eval_steps_per_second": 4.105,
218
+ "step": 28
219
+ },
220
+ {
221
+ "epoch": 1.5,
222
+ "eval_accuracy": 0.5074626865671642,
223
+ "eval_f1": 0.3401191658391261,
224
+ "eval_f1_weighted": 0.46642902666409763,
225
+ "eval_loss": 0.8897700309753418,
226
+ "eval_roc_auc": 0.6640330788365928,
227
+ "eval_runtime": 1.2183,
228
+ "eval_samples_per_second": 54.996,
229
+ "eval_steps_per_second": 4.104,
230
+ "step": 30
231
+ },
232
+ {
233
+ "epoch": 1.6,
234
+ "learning_rate": 4.600000000000001e-05,
235
+ "loss": 1.0359,
236
+ "step": 32
237
+ },
238
+ {
239
+ "epoch": 1.6,
240
+ "eval_accuracy": 0.5223880597014925,
241
+ "eval_f1": 0.3450960041515309,
242
+ "eval_f1_weighted": 0.4845130858421953,
243
+ "eval_loss": 0.8961546421051025,
244
+ "eval_roc_auc": 0.5583101007352929,
245
+ "eval_runtime": 1.2172,
246
+ "eval_samples_per_second": 55.043,
247
+ "eval_steps_per_second": 4.108,
248
+ "step": 32
249
+ },
250
+ {
251
+ "epoch": 1.7,
252
+ "eval_accuracy": 0.4925373134328358,
253
+ "eval_f1": 0.22,
254
+ "eval_f1_weighted": 0.3250746268656717,
255
+ "eval_loss": 0.9399242997169495,
256
+ "eval_roc_auc": 0.4854997107034975,
257
+ "eval_runtime": 1.2176,
258
+ "eval_samples_per_second": 55.026,
259
+ "eval_steps_per_second": 4.106,
260
+ "step": 34
261
+ },
262
+ {
263
+ "epoch": 1.8,
264
+ "learning_rate": 4.55e-05,
265
+ "loss": 0.8073,
266
+ "step": 36
267
+ },
268
+ {
269
+ "epoch": 1.8,
270
+ "eval_accuracy": 0.4925373134328358,
271
+ "eval_f1": 0.22,
272
+ "eval_f1_weighted": 0.3250746268656717,
273
+ "eval_loss": 0.9720465540885925,
274
+ "eval_roc_auc": 0.5332854471353733,
275
+ "eval_runtime": 1.2149,
276
+ "eval_samples_per_second": 55.149,
277
+ "eval_steps_per_second": 4.116,
278
+ "step": 36
279
+ },
280
+ {
281
+ "epoch": 1.9,
282
+ "eval_accuracy": 0.47761194029850745,
283
+ "eval_f1": 0.23389175257731962,
284
+ "eval_f1_weighted": 0.3418699030620096,
285
+ "eval_loss": 0.9526171684265137,
286
+ "eval_roc_auc": 0.5305002138807847,
287
+ "eval_runtime": 1.2175,
288
+ "eval_samples_per_second": 55.033,
289
+ "eval_steps_per_second": 4.107,
290
+ "step": 38
291
+ },
292
+ {
293
+ "epoch": 2.0,
294
+ "learning_rate": 4.5e-05,
295
+ "loss": 0.9176,
296
+ "step": 40
297
+ },
298
+ {
299
+ "epoch": 2.0,
300
+ "eval_accuracy": 0.5671641791044776,
301
+ "eval_f1": 0.3499132446500868,
302
+ "eval_f1_weighted": 0.4950407016392877,
303
+ "eval_loss": 0.9059419631958008,
304
+ "eval_roc_auc": 0.5067589980465311,
305
+ "eval_runtime": 1.215,
306
+ "eval_samples_per_second": 55.143,
307
+ "eval_steps_per_second": 4.115,
308
+ "step": 40
309
+ },
310
+ {
311
+ "epoch": 2.1,
312
+ "eval_accuracy": 0.5373134328358209,
313
+ "eval_f1": 0.37104798587706633,
314
+ "eval_f1_weighted": 0.5126222522438363,
315
+ "eval_loss": 0.8836531043052673,
316
+ "eval_roc_auc": 0.658280560593416,
317
+ "eval_runtime": 1.2194,
318
+ "eval_samples_per_second": 54.946,
319
+ "eval_steps_per_second": 4.1,
320
+ "step": 42
321
+ },
322
+ {
323
+ "epoch": 2.2,
324
+ "learning_rate": 4.4500000000000004e-05,
325
+ "loss": 0.7806,
326
+ "step": 44
327
+ },
328
+ {
329
+ "epoch": 2.2,
330
+ "eval_accuracy": 0.5074626865671642,
331
+ "eval_f1": 0.3503450489488043,
332
+ "eval_f1_weighted": 0.48381347954498094,
333
+ "eval_loss": 0.8884266018867493,
334
+ "eval_roc_auc": 0.6245352619442165,
335
+ "eval_runtime": 1.2159,
336
+ "eval_samples_per_second": 55.104,
337
+ "eval_steps_per_second": 4.112,
338
+ "step": 44
339
+ },
340
+ {
341
+ "epoch": 2.3,
342
+ "eval_accuracy": 0.5223880597014925,
343
+ "eval_f1": 0.36190017653667145,
344
+ "eval_f1_weighted": 0.5020084938811,
345
+ "eval_loss": 0.8871902823448181,
346
+ "eval_roc_auc": 0.5753152423966572,
347
+ "eval_runtime": 1.2182,
348
+ "eval_samples_per_second": 54.998,
349
+ "eval_steps_per_second": 4.104,
350
+ "step": 46
351
+ },
352
+ {
353
+ "epoch": 2.4,
354
+ "learning_rate": 4.4000000000000006e-05,
355
+ "loss": 0.8738,
356
+ "step": 48
357
+ },
358
+ {
359
+ "epoch": 2.4,
360
+ "eval_accuracy": 0.5373134328358209,
361
+ "eval_f1": 0.3720668595306975,
362
+ "eval_f1_weighted": 0.516494192489817,
363
+ "eval_loss": 0.8924217224121094,
364
+ "eval_roc_auc": 0.5522660805668758,
365
+ "eval_runtime": 1.216,
366
+ "eval_samples_per_second": 55.099,
367
+ "eval_steps_per_second": 4.112,
368
+ "step": 48
369
+ },
370
+ {
371
+ "epoch": 2.5,
372
+ "eval_accuracy": 0.5074626865671642,
373
+ "eval_f1": 0.35161135161135154,
374
+ "eval_f1_weighted": 0.4873252783700545,
375
+ "eval_loss": 0.9068598747253418,
376
+ "eval_roc_auc": 0.545635856249989,
377
+ "eval_runtime": 1.2169,
378
+ "eval_samples_per_second": 55.059,
379
+ "eval_steps_per_second": 4.109,
380
+ "step": 50
381
+ },
382
+ {
383
+ "epoch": 2.6,
384
+ "learning_rate": 4.35e-05,
385
+ "loss": 0.8057,
386
+ "step": 52
387
+ },
388
+ {
389
+ "epoch": 2.6,
390
+ "eval_accuracy": 0.5373134328358209,
391
+ "eval_f1": 0.3625316455696203,
392
+ "eval_f1_weighted": 0.507024371811827,
393
+ "eval_loss": 0.9044484496116638,
394
+ "eval_roc_auc": 0.5214351158888778,
395
+ "eval_runtime": 1.218,
396
+ "eval_samples_per_second": 55.009,
397
+ "eval_steps_per_second": 4.105,
398
+ "step": 52
399
+ },
400
+ {
401
+ "epoch": 2.7,
402
+ "eval_accuracy": 0.5223880597014925,
403
+ "eval_f1": 0.3617433414043583,
404
+ "eval_f1_weighted": 0.502135810053847,
405
+ "eval_loss": 0.9086884260177612,
406
+ "eval_roc_auc": 0.5436828813590747,
407
+ "eval_runtime": 1.2157,
408
+ "eval_samples_per_second": 55.111,
409
+ "eval_steps_per_second": 4.113,
410
+ "step": 54
411
+ },
412
+ {
413
+ "epoch": 2.8,
414
+ "learning_rate": 4.3e-05,
415
+ "loss": 0.8282,
416
+ "step": 56
417
+ },
418
+ {
419
+ "epoch": 2.8,
420
+ "eval_accuracy": 0.5522388059701493,
421
+ "eval_f1": 0.3738271604938272,
422
+ "eval_f1_weighted": 0.5141625207296849,
423
+ "eval_loss": 0.8964908123016357,
424
+ "eval_roc_auc": 0.6378891801507918,
425
+ "eval_runtime": 1.22,
426
+ "eval_samples_per_second": 54.917,
427
+ "eval_steps_per_second": 4.098,
428
+ "step": 56
429
+ },
430
+ {
431
+ "epoch": 2.9,
432
+ "eval_accuracy": 0.5223880597014925,
433
+ "eval_f1": 0.3606237816764133,
434
+ "eval_f1_weighted": 0.501440167583137,
435
+ "eval_loss": 0.8872122168540955,
436
+ "eval_roc_auc": 0.5484830326635979,
437
+ "eval_runtime": 1.2197,
438
+ "eval_samples_per_second": 54.931,
439
+ "eval_steps_per_second": 4.099,
440
+ "step": 58
441
+ },
442
+ {
443
+ "epoch": 3.0,
444
+ "learning_rate": 4.25e-05,
445
+ "loss": 0.8939,
446
+ "step": 60
447
+ },
448
+ {
449
+ "epoch": 3.0,
450
+ "eval_accuracy": 0.5074626865671642,
451
+ "eval_f1": 0.2615979381443299,
452
+ "eval_f1_weighted": 0.37907755039236807,
453
+ "eval_loss": 1.0024551153182983,
454
+ "eval_roc_auc": 0.5364163299657502,
455
+ "eval_runtime": 1.2167,
456
+ "eval_samples_per_second": 55.066,
457
+ "eval_steps_per_second": 4.109,
458
+ "step": 60
459
+ },
460
+ {
461
+ "epoch": 3.1,
462
+ "eval_accuracy": 0.4925373134328358,
463
+ "eval_f1": 0.2556818181818182,
464
+ "eval_f1_weighted": 0.37056196291270915,
465
+ "eval_loss": 1.0271281003952026,
466
+ "eval_roc_auc": 0.5599080164786127,
467
+ "eval_runtime": 1.219,
468
+ "eval_samples_per_second": 54.964,
469
+ "eval_steps_per_second": 4.102,
470
+ "step": 62
471
+ },
472
+ {
473
+ "epoch": 3.2,
474
+ "learning_rate": 4.2e-05,
475
+ "loss": 0.8565,
476
+ "step": 64
477
+ },
478
+ {
479
+ "epoch": 3.2,
480
+ "eval_accuracy": 0.5671641791044776,
481
+ "eval_f1": 0.3650793650793651,
482
+ "eval_f1_weighted": 0.513859275053305,
483
+ "eval_loss": 0.9313604235649109,
484
+ "eval_roc_auc": 0.559152227843963,
485
+ "eval_runtime": 1.2182,
486
+ "eval_samples_per_second": 55.001,
487
+ "eval_steps_per_second": 4.105,
488
+ "step": 64
489
+ },
490
+ {
491
+ "epoch": 3.3,
492
+ "eval_accuracy": 0.5522388059701493,
493
+ "eval_f1": 0.3823717948717949,
494
+ "eval_f1_weighted": 0.5314150401836969,
495
+ "eval_loss": 0.8814096450805664,
496
+ "eval_roc_auc": 0.6012026058913381,
497
+ "eval_runtime": 1.2165,
498
+ "eval_samples_per_second": 55.076,
499
+ "eval_steps_per_second": 4.11,
500
+ "step": 66
501
+ },
502
+ {
503
+ "epoch": 3.4,
504
+ "learning_rate": 4.15e-05,
505
+ "loss": 0.6669,
506
+ "step": 68
507
+ },
508
+ {
509
+ "epoch": 3.4,
510
+ "eval_accuracy": 0.5671641791044776,
511
+ "eval_f1": 0.38861709067188527,
512
+ "eval_f1_weighted": 0.5366051347956889,
513
+ "eval_loss": 0.9118794798851013,
514
+ "eval_roc_auc": 0.5886753942271424,
515
+ "eval_runtime": 1.215,
516
+ "eval_samples_per_second": 55.144,
517
+ "eval_steps_per_second": 4.115,
518
+ "step": 68
519
+ },
520
+ {
521
+ "epoch": 3.5,
522
+ "eval_accuracy": 0.5671641791044776,
523
+ "eval_f1": 0.3925533622211523,
524
+ "eval_f1_weighted": 0.5453761524586983,
525
+ "eval_loss": 0.895846962928772,
526
+ "eval_roc_auc": 0.6272478032128417,
527
+ "eval_runtime": 1.218,
528
+ "eval_samples_per_second": 55.007,
529
+ "eval_steps_per_second": 4.105,
530
+ "step": 70
531
+ },
532
+ {
533
+ "epoch": 3.6,
534
+ "learning_rate": 4.1e-05,
535
+ "loss": 0.7601,
536
+ "step": 72
537
+ },
538
+ {
539
+ "epoch": 3.6,
540
+ "eval_accuracy": 0.5671641791044776,
541
+ "eval_f1": 0.3917874396135265,
542
+ "eval_f1_weighted": 0.545078953060783,
543
+ "eval_loss": 0.9002715945243835,
544
+ "eval_roc_auc": 0.6277288949652808,
545
+ "eval_runtime": 1.2172,
546
+ "eval_samples_per_second": 55.044,
547
+ "eval_steps_per_second": 4.108,
548
+ "step": 72
549
+ },
550
+ {
551
+ "epoch": 3.7,
552
+ "eval_accuracy": 0.582089552238806,
553
+ "eval_f1": 0.4026036644165863,
554
+ "eval_f1_weighted": 0.5596583140229422,
555
+ "eval_loss": 0.8845404386520386,
556
+ "eval_roc_auc": 0.6422917674804015,
557
+ "eval_runtime": 1.2198,
558
+ "eval_samples_per_second": 54.928,
559
+ "eval_steps_per_second": 4.099,
560
+ "step": 74
561
+ },
562
+ {
563
+ "epoch": 3.8,
564
+ "learning_rate": 4.05e-05,
565
+ "loss": 0.792,
566
+ "step": 76
567
+ },
568
+ {
569
+ "epoch": 3.8,
570
+ "eval_accuracy": 0.6119402985074627,
571
+ "eval_f1": 0.4237179487179487,
572
+ "eval_f1_weighted": 0.5875143513203215,
573
+ "eval_loss": 0.8394505381584167,
574
+ "eval_roc_auc": 0.6545318671933383,
575
+ "eval_runtime": 1.22,
576
+ "eval_samples_per_second": 54.917,
577
+ "eval_steps_per_second": 4.098,
578
+ "step": 76
579
+ },
580
+ {
581
+ "epoch": 3.9,
582
+ "eval_accuracy": 0.5671641791044776,
583
+ "eval_f1": 0.3828202581926514,
584
+ "eval_f1_weighted": 0.5263824867717025,
585
+ "eval_loss": 0.8603011965751648,
586
+ "eval_roc_auc": 0.7116721198700833,
587
+ "eval_runtime": 1.2172,
588
+ "eval_samples_per_second": 55.046,
589
+ "eval_steps_per_second": 4.108,
590
+ "step": 78
591
+ },
592
+ {
593
+ "epoch": 4.0,
594
+ "learning_rate": 4e-05,
595
+ "loss": 0.5152,
596
+ "step": 80
597
+ },
598
+ {
599
+ "epoch": 4.0,
600
+ "eval_accuracy": 0.6567164179104478,
601
+ "eval_f1": 0.4573852051219312,
602
+ "eval_f1_weighted": 0.633066621869853,
603
+ "eval_loss": 0.7896729111671448,
604
+ "eval_roc_auc": 0.7033958257608891,
605
+ "eval_runtime": 1.2172,
606
+ "eval_samples_per_second": 55.045,
607
+ "eval_steps_per_second": 4.108,
608
+ "step": 80
609
+ },
610
+ {
611
+ "epoch": 4.1,
612
+ "eval_accuracy": 0.7014925373134329,
613
+ "eval_f1": 0.4861111111111111,
614
+ "eval_f1_weighted": 0.6759950248756219,
615
+ "eval_loss": 0.7801089286804199,
616
+ "eval_roc_auc": 0.6904540328850794,
617
+ "eval_runtime": 1.2191,
618
+ "eval_samples_per_second": 54.96,
619
+ "eval_steps_per_second": 4.101,
620
+ "step": 82
621
+ },
622
+ {
623
+ "epoch": 4.2,
624
+ "learning_rate": 3.9500000000000005e-05,
625
+ "loss": 0.403,
626
+ "step": 84
627
+ },
628
+ {
629
+ "epoch": 4.2,
630
+ "eval_accuracy": 0.6268656716417911,
631
+ "eval_f1": 0.4164096813381928,
632
+ "eval_f1_weighted": 0.5832569699860664,
633
+ "eval_loss": 0.9066091775894165,
634
+ "eval_roc_auc": 0.666676041452625,
635
+ "eval_runtime": 1.2185,
636
+ "eval_samples_per_second": 54.988,
637
+ "eval_steps_per_second": 4.104,
638
+ "step": 84
639
+ },
640
+ {
641
+ "epoch": 4.3,
642
+ "eval_accuracy": 0.6417910447761194,
643
+ "eval_f1": 0.4350649350649351,
644
+ "eval_f1_weighted": 0.6075789881760031,
645
+ "eval_loss": 0.8394883275032043,
646
+ "eval_roc_auc": 0.679015649561526,
647
+ "eval_runtime": 1.2195,
648
+ "eval_samples_per_second": 54.939,
649
+ "eval_steps_per_second": 4.1,
650
+ "step": 86
651
+ },
652
+ {
653
+ "epoch": 4.4,
654
+ "learning_rate": 3.9000000000000006e-05,
655
+ "loss": 0.7288,
656
+ "step": 88
657
+ },
658
+ {
659
+ "epoch": 4.4,
660
+ "eval_accuracy": 0.7164179104477612,
661
+ "eval_f1": 0.5004887585532747,
662
+ "eval_f1_weighted": 0.693307655272027,
663
+ "eval_loss": 0.7547105550765991,
664
+ "eval_roc_auc": 0.7260807241077717,
665
+ "eval_runtime": 1.2195,
666
+ "eval_samples_per_second": 54.941,
667
+ "eval_steps_per_second": 4.1,
668
+ "step": 88
669
+ },
670
+ {
671
+ "epoch": 4.5,
672
+ "eval_accuracy": 0.6716417910447762,
673
+ "eval_f1": 0.4660098522167488,
674
+ "eval_f1_weighted": 0.6442320417616352,
675
+ "eval_loss": 0.8313462138175964,
676
+ "eval_roc_auc": 0.7827001735083298,
677
+ "eval_runtime": 1.219,
678
+ "eval_samples_per_second": 54.964,
679
+ "eval_steps_per_second": 4.102,
680
+ "step": 90
681
+ },
682
+ {
683
+ "epoch": 4.6,
684
+ "learning_rate": 3.85e-05,
685
+ "loss": 0.5865,
686
+ "step": 92
687
+ },
688
+ {
689
+ "epoch": 4.6,
690
+ "eval_accuracy": 0.7611940298507462,
691
+ "eval_f1": 0.53125,
692
+ "eval_f1_weighted": 0.7364738805970149,
693
+ "eval_loss": 0.692564845085144,
694
+ "eval_roc_auc": 0.8177031301314561,
695
+ "eval_runtime": 1.2207,
696
+ "eval_samples_per_second": 54.885,
697
+ "eval_steps_per_second": 4.096,
698
+ "step": 92
699
+ },
700
+ {
701
+ "epoch": 4.7,
702
+ "eval_accuracy": 0.7761194029850746,
703
+ "eval_f1": 0.5424836601307189,
704
+ "eval_f1_weighted": 0.7518290898448932,
705
+ "eval_loss": 0.6342676877975464,
706
+ "eval_roc_auc": 0.8405736805475765,
707
+ "eval_runtime": 1.2164,
708
+ "eval_samples_per_second": 55.083,
709
+ "eval_steps_per_second": 4.111,
710
+ "step": 94
711
+ },
712
+ {
713
+ "epoch": 4.8,
714
+ "learning_rate": 3.8e-05,
715
+ "loss": 0.5263,
716
+ "step": 96
717
+ },
718
+ {
719
+ "epoch": 4.8,
720
+ "eval_accuracy": 0.7910447761194029,
721
+ "eval_f1": 0.553002223869533,
722
+ "eval_f1_weighted": 0.7668477479171968,
723
+ "eval_loss": 0.6138404011726379,
724
+ "eval_roc_auc": 0.8259139155322393,
725
+ "eval_runtime": 1.2175,
726
+ "eval_samples_per_second": 55.032,
727
+ "eval_steps_per_second": 4.107,
728
+ "step": 96
729
+ },
730
+ {
731
+ "epoch": 4.9,
732
+ "eval_accuracy": 0.8059701492537313,
733
+ "eval_f1": 0.5584795321637427,
734
+ "eval_f1_weighted": 0.7749410840534171,
735
+ "eval_loss": 0.5968928337097168,
736
+ "eval_roc_auc": 0.8748494390242875,
737
+ "eval_runtime": 1.2175,
738
+ "eval_samples_per_second": 55.029,
739
+ "eval_steps_per_second": 4.107,
740
+ "step": 98
741
+ },
742
+ {
743
+ "epoch": 5.0,
744
+ "learning_rate": 3.7500000000000003e-05,
745
+ "loss": 0.5847,
746
+ "step": 100
747
+ },
748
+ {
749
+ "epoch": 5.0,
750
+ "eval_accuracy": 0.8059701492537313,
751
+ "eval_f1": 0.5584795321637427,
752
+ "eval_f1_weighted": 0.7749410840534171,
753
+ "eval_loss": 0.5527657866477966,
754
+ "eval_roc_auc": 0.9323773138806697,
755
+ "eval_runtime": 1.2195,
756
+ "eval_samples_per_second": 54.942,
757
+ "eval_steps_per_second": 4.1,
758
+ "step": 100
759
+ },
760
+ {
761
+ "epoch": 5.1,
762
+ "eval_accuracy": 0.8059701492537313,
763
+ "eval_f1": 0.5584795321637427,
764
+ "eval_f1_weighted": 0.7749410840534171,
765
+ "eval_loss": 0.5388075709342957,
766
+ "eval_roc_auc": 0.9383424432250612,
767
+ "eval_runtime": 1.2201,
768
+ "eval_samples_per_second": 54.913,
769
+ "eval_steps_per_second": 4.098,
770
+ "step": 102
771
+ },
772
+ {
773
+ "epoch": 5.2,
774
+ "learning_rate": 3.7e-05,
775
+ "loss": 0.3932,
776
+ "step": 104
777
+ },
778
+ {
779
+ "epoch": 5.2,
780
+ "eval_accuracy": 0.8059701492537313,
781
+ "eval_f1": 0.5584795321637427,
782
+ "eval_f1_weighted": 0.7749410840534171,
783
+ "eval_loss": 0.5338171124458313,
784
+ "eval_roc_auc": 0.9357987315897205,
785
+ "eval_runtime": 1.2223,
786
+ "eval_samples_per_second": 54.817,
787
+ "eval_steps_per_second": 4.091,
788
+ "step": 104
789
+ },
790
+ {
791
+ "epoch": 5.3,
792
+ "eval_accuracy": 0.7910447761194029,
793
+ "eval_f1": 0.5490581490581491,
794
+ "eval_f1_weighted": 0.7613627195716748,
795
+ "eval_loss": 0.5593022704124451,
796
+ "eval_roc_auc": 0.9120323481946476,
797
+ "eval_runtime": 1.2192,
798
+ "eval_samples_per_second": 54.954,
799
+ "eval_steps_per_second": 4.101,
800
+ "step": 106
801
+ },
802
+ {
803
+ "epoch": 5.4,
804
+ "learning_rate": 3.65e-05,
805
+ "loss": 0.2978,
806
+ "step": 108
807
+ },
808
+ {
809
+ "epoch": 5.4,
810
+ "eval_accuracy": 0.7761194029850746,
811
+ "eval_f1": 0.5382716049382715,
812
+ "eval_f1_weighted": 0.7467108899944721,
813
+ "eval_loss": 0.5857189297676086,
814
+ "eval_roc_auc": 0.9198113925251405,
815
+ "eval_runtime": 1.2191,
816
+ "eval_samples_per_second": 54.96,
817
+ "eval_steps_per_second": 4.101,
818
+ "step": 108
819
+ },
820
+ {
821
+ "epoch": 5.5,
822
+ "eval_accuracy": 0.7761194029850746,
823
+ "eval_f1": 0.6409472880061116,
824
+ "eval_f1_weighted": 0.7609431832434467,
825
+ "eval_loss": 0.6259031295776367,
826
+ "eval_roc_auc": 0.9239536321347209,
827
+ "eval_runtime": 1.2186,
828
+ "eval_samples_per_second": 54.981,
829
+ "eval_steps_per_second": 4.103,
830
+ "step": 110
831
+ },
832
+ {
833
+ "epoch": 5.6,
834
+ "learning_rate": 3.6e-05,
835
+ "loss": 0.2672,
836
+ "step": 112
837
+ },
838
+ {
839
+ "epoch": 5.6,
840
+ "eval_accuracy": 0.8059701492537313,
841
+ "eval_f1": 0.6626771920889568,
842
+ "eval_f1_weighted": 0.7907103139763368,
843
+ "eval_loss": 0.6301503777503967,
844
+ "eval_roc_auc": 0.9190969468704965,
845
+ "eval_runtime": 1.2171,
846
+ "eval_samples_per_second": 55.05,
847
+ "eval_steps_per_second": 4.108,
848
+ "step": 112
849
+ },
850
+ {
851
+ "epoch": 5.7,
852
+ "eval_accuracy": 0.7910447761194029,
853
+ "eval_f1": 0.6517503805175039,
854
+ "eval_f1_weighted": 0.7759694677298439,
855
+ "eval_loss": 0.5792377591133118,
856
+ "eval_roc_auc": 0.876367107295521,
857
+ "eval_runtime": 1.2176,
858
+ "eval_samples_per_second": 55.025,
859
+ "eval_steps_per_second": 4.106,
860
+ "step": 114
861
+ },
862
+ {
863
+ "epoch": 5.8,
864
+ "learning_rate": 3.55e-05,
865
+ "loss": 0.2806,
866
+ "step": 116
867
+ },
868
+ {
869
+ "epoch": 5.8,
870
+ "eval_accuracy": 0.7910447761194029,
871
+ "eval_f1": 0.6416725894337836,
872
+ "eval_f1_weighted": 0.778988851053899,
873
+ "eval_loss": 0.5505416393280029,
874
+ "eval_roc_auc": 0.8766408197382413,
875
+ "eval_runtime": 1.2235,
876
+ "eval_samples_per_second": 54.762,
877
+ "eval_steps_per_second": 4.087,
878
+ "step": 116
879
+ },
880
+ {
881
+ "epoch": 5.9,
882
+ "eval_accuracy": 0.7014925373134329,
883
+ "eval_f1": 0.6693895476504172,
884
+ "eval_f1_weighted": 0.6933592905040018,
885
+ "eval_loss": 0.8218042254447937,
886
+ "eval_roc_auc": 0.8531633397655846,
887
+ "eval_runtime": 1.2201,
888
+ "eval_samples_per_second": 54.913,
889
+ "eval_steps_per_second": 4.098,
890
+ "step": 118
891
+ },
892
+ {
893
+ "epoch": 6.0,
894
+ "learning_rate": 3.5e-05,
895
+ "loss": 0.5082,
896
+ "step": 120
897
+ },
898
+ {
899
+ "epoch": 6.0,
900
+ "eval_accuracy": 0.746268656716418,
901
+ "eval_f1": 0.7028011204481793,
902
+ "eval_f1_weighted": 0.7403110497930515,
903
+ "eval_loss": 0.7941116094589233,
904
+ "eval_roc_auc": 0.882679465493187,
905
+ "eval_runtime": 1.2184,
906
+ "eval_samples_per_second": 54.989,
907
+ "eval_steps_per_second": 4.104,
908
+ "step": 120
909
+ },
910
+ {
911
+ "epoch": 6.1,
912
+ "eval_accuracy": 0.8507462686567164,
913
+ "eval_f1": 0.7489281210592686,
914
+ "eval_f1_weighted": 0.8448194086314957,
915
+ "eval_loss": 0.47939425706863403,
916
+ "eval_roc_auc": 0.9171689795255024,
917
+ "eval_runtime": 1.2187,
918
+ "eval_samples_per_second": 54.976,
919
+ "eval_steps_per_second": 4.103,
920
+ "step": 122
921
+ },
922
+ {
923
+ "epoch": 6.2,
924
+ "learning_rate": 3.45e-05,
925
+ "loss": 0.2646,
926
+ "step": 124
927
+ },
928
+ {
929
+ "epoch": 6.2,
930
+ "eval_accuracy": 0.835820895522388,
931
+ "eval_f1": 0.6841817186644773,
932
+ "eval_f1_weighted": 0.820184790334044,
933
+ "eval_loss": 0.5200427770614624,
934
+ "eval_roc_auc": 0.8998004509846013,
935
+ "eval_runtime": 1.2187,
936
+ "eval_samples_per_second": 54.975,
937
+ "eval_steps_per_second": 4.103,
938
+ "step": 124
939
+ },
940
+ {
941
+ "epoch": 6.3,
942
+ "eval_accuracy": 0.7910447761194029,
943
+ "eval_f1": 0.6517503805175039,
944
+ "eval_f1_weighted": 0.7759694677298439,
945
+ "eval_loss": 0.633881151676178,
946
+ "eval_roc_auc": 0.9095164374087981,
947
+ "eval_runtime": 1.2183,
948
+ "eval_samples_per_second": 54.996,
949
+ "eval_steps_per_second": 4.104,
950
+ "step": 126
951
+ },
952
+ {
953
+ "epoch": 6.4,
954
+ "learning_rate": 3.4000000000000007e-05,
955
+ "loss": 0.2178,
956
+ "step": 128
957
+ },
958
+ {
959
+ "epoch": 6.4,
960
+ "eval_accuracy": 0.7910447761194029,
961
+ "eval_f1": 0.5480140774258421,
962
+ "eval_f1_weighted": 0.7605862092253663,
963
+ "eval_loss": 0.7460798025131226,
964
+ "eval_roc_auc": 0.9236619354087335,
965
+ "eval_runtime": 1.2193,
966
+ "eval_samples_per_second": 54.952,
967
+ "eval_steps_per_second": 4.101,
968
+ "step": 128
969
+ },
970
+ {
971
+ "epoch": 6.5,
972
+ "eval_accuracy": 0.7313432835820896,
973
+ "eval_f1": 0.5012973533990659,
974
+ "eval_f1_weighted": 0.6975346412721033,
975
+ "eval_loss": 0.8184891939163208,
976
+ "eval_roc_auc": 0.9135357751398279,
977
+ "eval_runtime": 1.2209,
978
+ "eval_samples_per_second": 54.878,
979
+ "eval_steps_per_second": 4.095,
980
+ "step": 130
981
+ },
982
+ {
983
+ "epoch": 6.6,
984
+ "learning_rate": 3.35e-05,
985
+ "loss": 0.481,
986
+ "step": 132
987
+ },
988
+ {
989
+ "epoch": 6.6,
990
+ "eval_accuracy": 0.7761194029850746,
991
+ "eval_f1": 0.5367088607594936,
992
+ "eval_f1_weighted": 0.7452862270923862,
993
+ "eval_loss": 0.7749145030975342,
994
+ "eval_roc_auc": 0.9241951537539688,
995
+ "eval_runtime": 1.2188,
996
+ "eval_samples_per_second": 54.972,
997
+ "eval_steps_per_second": 4.102,
998
+ "step": 132
999
+ },
1000
+ {
1001
+ "epoch": 6.7,
1002
+ "eval_accuracy": 0.7910447761194029,
1003
+ "eval_f1": 0.5480140774258421,
1004
+ "eval_f1_weighted": 0.7605862092253663,
1005
+ "eval_loss": 0.70408034324646,
1006
+ "eval_roc_auc": 0.9353670079123542,
1007
+ "eval_runtime": 1.2137,
1008
+ "eval_samples_per_second": 55.205,
1009
+ "eval_steps_per_second": 4.12,
1010
+ "step": 134
1011
+ },
1012
+ {
1013
+ "epoch": 6.8,
1014
+ "learning_rate": 3.3e-05,
1015
+ "loss": 0.357,
1016
+ "step": 136
1017
+ },
1018
+ {
1019
+ "epoch": 6.8,
1020
+ "eval_accuracy": 0.7761194029850746,
1021
+ "eval_f1": 0.5418752085418752,
1022
+ "eval_f1_weighted": 0.7520356177072596,
1023
+ "eval_loss": 0.5984435677528381,
1024
+ "eval_roc_auc": 0.9422144107122601,
1025
+ "eval_runtime": 1.2172,
1026
+ "eval_samples_per_second": 55.046,
1027
+ "eval_steps_per_second": 4.108,
1028
+ "step": 136
1029
+ },
1030
+ {
1031
+ "epoch": 6.9,
1032
+ "eval_accuracy": 0.7910447761194029,
1033
+ "eval_f1": 0.5568922305764411,
1034
+ "eval_f1_weighted": 0.7725956682751655,
1035
+ "eval_loss": 0.6042739152908325,
1036
+ "eval_roc_auc": 0.8886977232627885,
1037
+ "eval_runtime": 1.2199,
1038
+ "eval_samples_per_second": 54.923,
1039
+ "eval_steps_per_second": 4.099,
1040
+ "step": 138
1041
+ },
1042
+ {
1043
+ "epoch": 7.0,
1044
+ "learning_rate": 3.2500000000000004e-05,
1045
+ "loss": 0.2442,
1046
+ "step": 140
1047
+ },
1048
+ {
1049
+ "epoch": 7.0,
1050
+ "eval_accuracy": 0.7761194029850746,
1051
+ "eval_f1": 0.6232240437158469,
1052
+ "eval_f1_weighted": 0.7674700269146073,
1053
+ "eval_loss": 0.6625877618789673,
1054
+ "eval_roc_auc": 0.885796771407202,
1055
+ "eval_runtime": 1.2178,
1056
+ "eval_samples_per_second": 55.016,
1057
+ "eval_steps_per_second": 4.106,
1058
+ "step": 140
1059
+ },
1060
+ {
1061
+ "epoch": 7.1,
1062
+ "eval_accuracy": 0.746268656716418,
1063
+ "eval_f1": 0.601851851851852,
1064
+ "eval_f1_weighted": 0.7374437337123906,
1065
+ "eval_loss": 0.7249980568885803,
1066
+ "eval_roc_auc": 0.8836999918218473,
1067
+ "eval_runtime": 1.2196,
1068
+ "eval_samples_per_second": 54.937,
1069
+ "eval_steps_per_second": 4.1,
1070
+ "step": 142
1071
+ },
1072
+ {
1073
+ "epoch": 7.2,
1074
+ "learning_rate": 3.2000000000000005e-05,
1075
+ "loss": 0.2895,
1076
+ "step": 144
1077
+ },
1078
+ {
1079
+ "epoch": 7.2,
1080
+ "eval_accuracy": 0.7611940298507462,
1081
+ "eval_f1": 0.6125672043010751,
1082
+ "eval_f1_weighted": 0.7525126384207992,
1083
+ "eval_loss": 0.7177485227584839,
1084
+ "eval_roc_auc": 0.8988318738409138,
1085
+ "eval_runtime": 1.219,
1086
+ "eval_samples_per_second": 54.963,
1087
+ "eval_steps_per_second": 4.102,
1088
+ "step": 144
1089
+ },
1090
+ {
1091
+ "epoch": 7.3,
1092
+ "eval_accuracy": 0.8059701492537313,
1093
+ "eval_f1": 0.6442283244342741,
1094
+ "eval_f1_weighted": 0.7971669114382322,
1095
+ "eval_loss": 0.6158971786499023,
1096
+ "eval_roc_auc": 0.9257433466065693,
1097
+ "eval_runtime": 1.2179,
1098
+ "eval_samples_per_second": 55.013,
1099
+ "eval_steps_per_second": 4.105,
1100
+ "step": 146
1101
+ },
1102
+ {
1103
+ "epoch": 7.4,
1104
+ "learning_rate": 3.15e-05,
1105
+ "loss": 0.1247,
1106
+ "step": 148
1107
+ },
1108
+ {
1109
+ "epoch": 7.4,
1110
+ "eval_accuracy": 0.7761194029850746,
1111
+ "eval_f1": 0.6135265700483091,
1112
+ "eval_f1_weighted": 0.7752109020116807,
1113
+ "eval_loss": 0.5960801243782043,
1114
+ "eval_roc_auc": 0.9299025860599263,
1115
+ "eval_runtime": 1.2186,
1116
+ "eval_samples_per_second": 54.98,
1117
+ "eval_steps_per_second": 4.103,
1118
+ "step": 148
1119
+ },
1120
+ {
1121
+ "epoch": 7.5,
1122
+ "eval_accuracy": 0.7910447761194029,
1123
+ "eval_f1": 0.6803418803418803,
1124
+ "eval_f1_weighted": 0.7898966704936854,
1125
+ "eval_loss": 0.6297043561935425,
1126
+ "eval_roc_auc": 0.928043745151505,
1127
+ "eval_runtime": 1.2207,
1128
+ "eval_samples_per_second": 54.884,
1129
+ "eval_steps_per_second": 4.096,
1130
+ "step": 150
1131
+ },
1132
+ {
1133
+ "epoch": 7.6,
1134
+ "learning_rate": 3.1e-05,
1135
+ "loss": 0.1401,
1136
+ "step": 152
1137
+ },
1138
+ {
1139
+ "epoch": 7.6,
1140
+ "eval_accuracy": 0.7761194029850746,
1141
+ "eval_f1": 0.6214568794692857,
1142
+ "eval_f1_weighted": 0.7664838768164862,
1143
+ "eval_loss": 0.7288922667503357,
1144
+ "eval_roc_auc": 0.9169795844990506,
1145
+ "eval_runtime": 1.2177,
1146
+ "eval_samples_per_second": 55.02,
1147
+ "eval_steps_per_second": 4.106,
1148
+ "step": 152
1149
+ },
1150
+ {
1151
+ "epoch": 7.7,
1152
+ "eval_accuracy": 0.7611940298507462,
1153
+ "eval_f1": 0.5344212136664966,
1154
+ "eval_f1_weighted": 0.7423565497347531,
1155
+ "eval_loss": 0.7684075832366943,
1156
+ "eval_roc_auc": 0.9071037779761267,
1157
+ "eval_runtime": 1.2191,
1158
+ "eval_samples_per_second": 54.959,
1159
+ "eval_steps_per_second": 4.101,
1160
+ "step": 154
1161
+ },
1162
+ {
1163
+ "epoch": 7.8,
1164
+ "learning_rate": 3.05e-05,
1165
+ "loss": 0.1425,
1166
+ "step": 156
1167
+ },
1168
+ {
1169
+ "epoch": 7.8,
1170
+ "eval_accuracy": 0.7611940298507462,
1171
+ "eval_f1": 0.5344212136664966,
1172
+ "eval_f1_weighted": 0.7423565497347531,
1173
+ "eval_loss": 0.7739881873130798,
1174
+ "eval_roc_auc": 0.9037702780185827,
1175
+ "eval_runtime": 1.2219,
1176
+ "eval_samples_per_second": 54.832,
1177
+ "eval_steps_per_second": 4.092,
1178
+ "step": 156
1179
+ },
1180
+ {
1181
+ "epoch": 7.9,
1182
+ "eval_accuracy": 0.8059701492537313,
1183
+ "eval_f1": 0.6499480563407505,
1184
+ "eval_f1_weighted": 0.792322362962314,
1185
+ "eval_loss": 0.7463679909706116,
1186
+ "eval_roc_auc": 0.9108798026167514,
1187
+ "eval_runtime": 1.2161,
1188
+ "eval_samples_per_second": 55.094,
1189
+ "eval_steps_per_second": 4.111,
1190
+ "step": 158
1191
+ },
1192
+ {
1193
+ "epoch": 8.0,
1194
+ "learning_rate": 3e-05,
1195
+ "loss": 0.0541,
1196
+ "step": 160
1197
+ },
1198
+ {
1199
+ "epoch": 8.0,
1200
+ "eval_accuracy": 0.7910447761194029,
1201
+ "eval_f1": 0.6396825396825397,
1202
+ "eval_f1_weighted": 0.7780383795309169,
1203
+ "eval_loss": 0.6964292526245117,
1204
+ "eval_roc_auc": 0.924278504123671,
1205
+ "eval_runtime": 1.2218,
1206
+ "eval_samples_per_second": 54.838,
1207
+ "eval_steps_per_second": 4.092,
1208
+ "step": 160
1209
+ },
1210
+ {
1211
+ "epoch": 8.1,
1212
+ "eval_accuracy": 0.7910447761194029,
1213
+ "eval_f1": 0.6396825396825397,
1214
+ "eval_f1_weighted": 0.7780383795309169,
1215
+ "eval_loss": 0.6750349402427673,
1216
+ "eval_roc_auc": 0.9306260238562039,
1217
+ "eval_runtime": 1.2115,
1218
+ "eval_samples_per_second": 55.304,
1219
+ "eval_steps_per_second": 4.127,
1220
+ "step": 162
1221
+ },
1222
+ {
1223
+ "epoch": 8.2,
1224
+ "learning_rate": 2.95e-05,
1225
+ "loss": 0.0735,
1226
+ "step": 164
1227
+ },
1228
+ {
1229
+ "epoch": 8.2,
1230
+ "eval_accuracy": 0.7910447761194029,
1231
+ "eval_f1": 0.6396825396825397,
1232
+ "eval_f1_weighted": 0.7780383795309169,
1233
+ "eval_loss": 0.6345735788345337,
1234
+ "eval_roc_auc": 0.9307606763941116,
1235
+ "eval_runtime": 1.2207,
1236
+ "eval_samples_per_second": 54.885,
1237
+ "eval_steps_per_second": 4.096,
1238
+ "step": 164
1239
+ },
1240
+ {
1241
+ "epoch": 8.3,
1242
+ "eval_accuracy": 0.8059701492537313,
1243
+ "eval_f1": 0.6507377598926894,
1244
+ "eval_f1_weighted": 0.7930943872188354,
1245
+ "eval_loss": 0.6417430639266968,
1246
+ "eval_roc_auc": 0.9207915603132855,
1247
+ "eval_runtime": 1.2167,
1248
+ "eval_samples_per_second": 55.069,
1249
+ "eval_steps_per_second": 4.11,
1250
+ "step": 166
1251
+ },
1252
+ {
1253
+ "epoch": 8.4,
1254
+ "learning_rate": 2.9e-05,
1255
+ "loss": 0.0701,
1256
+ "step": 168
1257
+ },
1258
+ {
1259
+ "epoch": 8.4,
1260
+ "eval_accuracy": 0.8208955223880597,
1261
+ "eval_f1": 0.6735030063421464,
1262
+ "eval_f1_weighted": 0.8055976603269788,
1263
+ "eval_loss": 0.6598219871520996,
1264
+ "eval_roc_auc": 0.9178574505876599,
1265
+ "eval_runtime": 1.2178,
1266
+ "eval_samples_per_second": 55.017,
1267
+ "eval_steps_per_second": 4.106,
1268
+ "step": 168
1269
+ },
1270
+ {
1271
+ "epoch": 8.5,
1272
+ "eval_accuracy": 0.8208955223880597,
1273
+ "eval_f1": 0.6735030063421464,
1274
+ "eval_f1_weighted": 0.8055976603269788,
1275
+ "eval_loss": 0.674333393573761,
1276
+ "eval_roc_auc": 0.9147662966739546,
1277
+ "eval_runtime": 1.2139,
1278
+ "eval_samples_per_second": 55.194,
1279
+ "eval_steps_per_second": 4.119,
1280
+ "step": 170
1281
+ },
1282
+ {
1283
+ "epoch": 8.6,
1284
+ "learning_rate": 2.8499999999999998e-05,
1285
+ "loss": 0.0415,
1286
+ "step": 172
1287
+ },
1288
+ {
1289
+ "epoch": 8.6,
1290
+ "eval_accuracy": 0.8208955223880597,
1291
+ "eval_f1": 0.6735030063421464,
1292
+ "eval_f1_weighted": 0.8055976603269788,
1293
+ "eval_loss": 0.6726630330085754,
1294
+ "eval_roc_auc": 0.9180612295621405,
1295
+ "eval_runtime": 1.2166,
1296
+ "eval_samples_per_second": 55.074,
1297
+ "eval_steps_per_second": 4.11,
1298
+ "step": 172
1299
+ },
1300
+ {
1301
+ "epoch": 8.7,
1302
+ "eval_accuracy": 0.835820895522388,
1303
+ "eval_f1": 0.6841817186644773,
1304
+ "eval_f1_weighted": 0.820184790334044,
1305
+ "eval_loss": 0.6476911902427673,
1306
+ "eval_roc_auc": 0.9287753506648254,
1307
+ "eval_runtime": 1.2178,
1308
+ "eval_samples_per_second": 55.016,
1309
+ "eval_steps_per_second": 4.106,
1310
+ "step": 174
1311
+ },
1312
+ {
1313
+ "epoch": 8.8,
1314
+ "learning_rate": 2.8000000000000003e-05,
1315
+ "loss": 0.1207,
1316
+ "step": 176
1317
+ },
1318
+ {
1319
+ "epoch": 8.8,
1320
+ "eval_accuracy": 0.8507462686567164,
1321
+ "eval_f1": 0.6947514943093426,
1322
+ "eval_f1_weighted": 0.8346513563354928,
1323
+ "eval_loss": 0.6322030425071716,
1324
+ "eval_roc_auc": 0.9343940407946855,
1325
+ "eval_runtime": 1.2175,
1326
+ "eval_samples_per_second": 55.032,
1327
+ "eval_steps_per_second": 4.107,
1328
+ "step": 176
1329
+ },
1330
+ {
1331
+ "epoch": 8.9,
1332
+ "eval_accuracy": 0.8507462686567164,
1333
+ "eval_f1": 0.6947514943093426,
1334
+ "eval_f1_weighted": 0.8346513563354928,
1335
+ "eval_loss": 0.631219744682312,
1336
+ "eval_roc_auc": 0.9355877867306113,
1337
+ "eval_runtime": 1.2171,
1338
+ "eval_samples_per_second": 55.047,
1339
+ "eval_steps_per_second": 4.108,
1340
+ "step": 178
1341
+ },
1342
+ {
1343
+ "epoch": 9.0,
1344
+ "learning_rate": 2.7500000000000004e-05,
1345
+ "loss": 0.0677,
1346
+ "step": 180
1347
+ },
1348
+ {
1349
+ "epoch": 9.0,
1350
+ "eval_accuracy": 0.835820895522388,
1351
+ "eval_f1": 0.6843137254901962,
1352
+ "eval_f1_weighted": 0.8201053555750658,
1353
+ "eval_loss": 0.6727584600448608,
1354
+ "eval_roc_auc": 0.9294350538309254,
1355
+ "eval_runtime": 1.213,
1356
+ "eval_samples_per_second": 55.237,
1357
+ "eval_steps_per_second": 4.122,
1358
+ "step": 180
1359
+ },
1360
+ {
1361
+ "epoch": 9.1,
1362
+ "eval_accuracy": 0.8059701492537313,
1363
+ "eval_f1": 0.6633986928104575,
1364
+ "eval_f1_weighted": 0.7911911033069944,
1365
+ "eval_loss": 0.7204322218894958,
1366
+ "eval_roc_auc": 0.9322838617007921,
1367
+ "eval_runtime": 1.2192,
1368
+ "eval_samples_per_second": 54.955,
1369
+ "eval_steps_per_second": 4.101,
1370
+ "step": 182
1371
+ },
1372
+ {
1373
+ "epoch": 9.2,
1374
+ "learning_rate": 2.7000000000000002e-05,
1375
+ "loss": 0.0334,
1376
+ "step": 184
1377
+ },
1378
+ {
1379
+ "epoch": 9.2,
1380
+ "eval_accuracy": 0.8059701492537313,
1381
+ "eval_f1": 0.6633986928104575,
1382
+ "eval_f1_weighted": 0.7911911033069944,
1383
+ "eval_loss": 0.6748175621032715,
1384
+ "eval_roc_auc": 0.9319867731684095,
1385
+ "eval_runtime": 1.2166,
1386
+ "eval_samples_per_second": 55.073,
1387
+ "eval_steps_per_second": 4.11,
1388
+ "step": 184
1389
+ },
1390
+ {
1391
+ "epoch": 9.3,
1392
+ "eval_accuracy": 0.8059701492537313,
1393
+ "eval_f1": 0.6631636562671046,
1394
+ "eval_f1_weighted": 0.7911869225302061,
1395
+ "eval_loss": 0.6425347924232483,
1396
+ "eval_roc_auc": 0.9327048191582917,
1397
+ "eval_runtime": 1.2192,
1398
+ "eval_samples_per_second": 54.952,
1399
+ "eval_steps_per_second": 4.101,
1400
+ "step": 186
1401
+ },
1402
+ {
1403
+ "epoch": 9.4,
1404
+ "learning_rate": 2.6500000000000004e-05,
1405
+ "loss": 0.0184,
1406
+ "step": 188
1407
+ },
1408
+ {
1409
+ "epoch": 9.4,
1410
+ "eval_accuracy": 0.8208955223880597,
1411
+ "eval_f1": 0.7118127444214402,
1412
+ "eval_f1_weighted": 0.8170045219039379,
1413
+ "eval_loss": 0.6353668570518494,
1414
+ "eval_roc_auc": 0.9364646683797305,
1415
+ "eval_runtime": 1.2147,
1416
+ "eval_samples_per_second": 55.155,
1417
+ "eval_steps_per_second": 4.116,
1418
+ "step": 188
1419
+ },
1420
+ {
1421
+ "epoch": 9.5,
1422
+ "eval_accuracy": 0.835820895522388,
1423
+ "eval_f1": 0.7686429512516469,
1424
+ "eval_f1_weighted": 0.8350775765441567,
1425
+ "eval_loss": 0.6435410380363464,
1426
+ "eval_roc_auc": 0.9426883191871465,
1427
+ "eval_runtime": 1.2183,
1428
+ "eval_samples_per_second": 54.995,
1429
+ "eval_steps_per_second": 4.104,
1430
+ "step": 190
1431
+ }
1432
+ ],
1433
+ "max_steps": 400,
1434
+ "num_train_epochs": 20,
1435
+ "total_flos": 769080520018944.0,
1436
+ "trial_name": null,
1437
+ "trial_params": null
1438
+ }