kotstantinovskii commited on
Commit
28237f0
·
1 Parent(s): 5cdc0fd

Upload checkpoint-11500/trainer_state.json

Browse files
Files changed (1) hide show
  1. checkpoint-11500/trainer_state.json +752 -0
checkpoint-11500/trainer_state.json ADDED
@@ -0,0 +1,752 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.122239351272583,
3
+ "best_model_checkpoint": "./res_1/checkpoint-8500",
4
+ "epoch": 3.72168284789644,
5
+ "global_step": 11500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 2.5e-05,
13
+ "loss": 4.4764,
14
+ "step": 250
15
+ },
16
+ {
17
+ "epoch": 0.08,
18
+ "eval_accuracy": 0.2770019218449712,
19
+ "eval_f1_score": 0.15899206409534755,
20
+ "eval_loss": 3.3759119510650635,
21
+ "eval_runtime": 78.6208,
22
+ "eval_samples_per_second": 198.548,
23
+ "eval_steps_per_second": 2.073,
24
+ "step": 250
25
+ },
26
+ {
27
+ "epoch": 0.16,
28
+ "learning_rate": 5e-05,
29
+ "loss": 2.7621,
30
+ "step": 500
31
+ },
32
+ {
33
+ "epoch": 0.16,
34
+ "eval_accuracy": 0.45624599615631006,
35
+ "eval_f1_score": 0.37938588394970946,
36
+ "eval_loss": 2.22213077545166,
37
+ "eval_runtime": 79.1544,
38
+ "eval_samples_per_second": 197.21,
39
+ "eval_steps_per_second": 2.059,
40
+ "step": 500
41
+ },
42
+ {
43
+ "epoch": 0.24,
44
+ "learning_rate": 4.89460370994941e-05,
45
+ "loss": 2.0369,
46
+ "step": 750
47
+ },
48
+ {
49
+ "epoch": 0.24,
50
+ "eval_accuracy": 0.5180653427290198,
51
+ "eval_f1_score": 0.4651383493082148,
52
+ "eval_loss": 1.8033452033996582,
53
+ "eval_runtime": 78.4993,
54
+ "eval_samples_per_second": 198.855,
55
+ "eval_steps_per_second": 2.076,
56
+ "step": 750
57
+ },
58
+ {
59
+ "epoch": 0.32,
60
+ "learning_rate": 4.7892074198988196e-05,
61
+ "loss": 1.7846,
62
+ "step": 1000
63
+ },
64
+ {
65
+ "epoch": 0.32,
66
+ "eval_accuracy": 0.5547085201793722,
67
+ "eval_f1_score": 0.5145917984904441,
68
+ "eval_loss": 1.6128593683242798,
69
+ "eval_runtime": 79.1904,
70
+ "eval_samples_per_second": 197.12,
71
+ "eval_steps_per_second": 2.058,
72
+ "step": 1000
73
+ },
74
+ {
75
+ "epoch": 0.4,
76
+ "learning_rate": 4.683811129848229e-05,
77
+ "loss": 1.5885,
78
+ "step": 1250
79
+ },
80
+ {
81
+ "epoch": 0.4,
82
+ "eval_accuracy": 0.5733504163997437,
83
+ "eval_f1_score": 0.5403077333842975,
84
+ "eval_loss": 1.5020724534988403,
85
+ "eval_runtime": 79.2228,
86
+ "eval_samples_per_second": 197.039,
87
+ "eval_steps_per_second": 2.057,
88
+ "step": 1250
89
+ },
90
+ {
91
+ "epoch": 0.49,
92
+ "learning_rate": 4.5784148397976396e-05,
93
+ "loss": 1.5474,
94
+ "step": 1500
95
+ },
96
+ {
97
+ "epoch": 0.49,
98
+ "eval_accuracy": 0.5885970531710442,
99
+ "eval_f1_score": 0.5598433530383174,
100
+ "eval_loss": 1.4353328943252563,
101
+ "eval_runtime": 79.3289,
102
+ "eval_samples_per_second": 196.776,
103
+ "eval_steps_per_second": 2.055,
104
+ "step": 1500
105
+ },
106
+ {
107
+ "epoch": 0.57,
108
+ "learning_rate": 4.4730185497470486e-05,
109
+ "loss": 1.4754,
110
+ "step": 1750
111
+ },
112
+ {
113
+ "epoch": 0.57,
114
+ "eval_accuracy": 0.5910954516335682,
115
+ "eval_f1_score": 0.567613249026739,
116
+ "eval_loss": 1.4085466861724854,
117
+ "eval_runtime": 79.2487,
118
+ "eval_samples_per_second": 196.975,
119
+ "eval_steps_per_second": 2.057,
120
+ "step": 1750
121
+ },
122
+ {
123
+ "epoch": 0.65,
124
+ "learning_rate": 4.367622259696459e-05,
125
+ "loss": 1.3951,
126
+ "step": 2000
127
+ },
128
+ {
129
+ "epoch": 0.65,
130
+ "eval_accuracy": 0.6080717488789238,
131
+ "eval_f1_score": 0.5881168924607102,
132
+ "eval_loss": 1.3425586223602295,
133
+ "eval_runtime": 79.2258,
134
+ "eval_samples_per_second": 197.032,
135
+ "eval_steps_per_second": 2.057,
136
+ "step": 2000
137
+ },
138
+ {
139
+ "epoch": 0.73,
140
+ "learning_rate": 4.262225969645869e-05,
141
+ "loss": 1.3711,
142
+ "step": 2250
143
+ },
144
+ {
145
+ "epoch": 0.73,
146
+ "eval_accuracy": 0.6153747597693786,
147
+ "eval_f1_score": 0.5932326664510665,
148
+ "eval_loss": 1.3140783309936523,
149
+ "eval_runtime": 79.1766,
150
+ "eval_samples_per_second": 197.154,
151
+ "eval_steps_per_second": 2.059,
152
+ "step": 2250
153
+ },
154
+ {
155
+ "epoch": 0.81,
156
+ "learning_rate": 4.1568296795952783e-05,
157
+ "loss": 1.3487,
158
+ "step": 2500
159
+ },
160
+ {
161
+ "epoch": 0.81,
162
+ "eval_accuracy": 0.6163997437540039,
163
+ "eval_f1_score": 0.5956180167874238,
164
+ "eval_loss": 1.3029619455337524,
165
+ "eval_runtime": 79.2987,
166
+ "eval_samples_per_second": 196.851,
167
+ "eval_steps_per_second": 2.056,
168
+ "step": 2500
169
+ },
170
+ {
171
+ "epoch": 0.89,
172
+ "learning_rate": 4.051433389544688e-05,
173
+ "loss": 1.325,
174
+ "step": 2750
175
+ },
176
+ {
177
+ "epoch": 0.89,
178
+ "eval_accuracy": 0.6240230621396541,
179
+ "eval_f1_score": 0.6027158431466506,
180
+ "eval_loss": 1.2634259462356567,
181
+ "eval_runtime": 79.2608,
182
+ "eval_samples_per_second": 196.945,
183
+ "eval_steps_per_second": 2.057,
184
+ "step": 2750
185
+ },
186
+ {
187
+ "epoch": 0.97,
188
+ "learning_rate": 3.9460370994940984e-05,
189
+ "loss": 1.307,
190
+ "step": 3000
191
+ },
192
+ {
193
+ "epoch": 0.97,
194
+ "eval_accuracy": 0.6235105701473415,
195
+ "eval_f1_score": 0.6034162405543351,
196
+ "eval_loss": 1.2556581497192383,
197
+ "eval_runtime": 79.2153,
198
+ "eval_samples_per_second": 197.058,
199
+ "eval_steps_per_second": 2.058,
200
+ "step": 3000
201
+ },
202
+ {
203
+ "epoch": 1.05,
204
+ "learning_rate": 3.8406408094435074e-05,
205
+ "loss": 1.2019,
206
+ "step": 3250
207
+ },
208
+ {
209
+ "epoch": 1.05,
210
+ "eval_accuracy": 0.6322869955156951,
211
+ "eval_f1_score": 0.6147432260064974,
212
+ "eval_loss": 1.227844476699829,
213
+ "eval_runtime": 79.4547,
214
+ "eval_samples_per_second": 196.464,
215
+ "eval_steps_per_second": 2.051,
216
+ "step": 3250
217
+ },
218
+ {
219
+ "epoch": 1.13,
220
+ "learning_rate": 3.735244519392918e-05,
221
+ "loss": 1.1366,
222
+ "step": 3500
223
+ },
224
+ {
225
+ "epoch": 1.13,
226
+ "eval_accuracy": 0.6326713645099296,
227
+ "eval_f1_score": 0.6163567005428549,
228
+ "eval_loss": 1.2262649536132812,
229
+ "eval_runtime": 79.2235,
230
+ "eval_samples_per_second": 197.037,
231
+ "eval_steps_per_second": 2.057,
232
+ "step": 3500
233
+ },
234
+ {
235
+ "epoch": 1.21,
236
+ "learning_rate": 3.6298482293423274e-05,
237
+ "loss": 1.1643,
238
+ "step": 3750
239
+ },
240
+ {
241
+ "epoch": 1.21,
242
+ "eval_accuracy": 0.6368994234465086,
243
+ "eval_f1_score": 0.6215204096974251,
244
+ "eval_loss": 1.2120453119277954,
245
+ "eval_runtime": 79.1999,
246
+ "eval_samples_per_second": 197.096,
247
+ "eval_steps_per_second": 2.058,
248
+ "step": 3750
249
+ },
250
+ {
251
+ "epoch": 1.29,
252
+ "learning_rate": 3.524451939291737e-05,
253
+ "loss": 1.1226,
254
+ "step": 4000
255
+ },
256
+ {
257
+ "epoch": 1.29,
258
+ "eval_accuracy": 0.6377322229340167,
259
+ "eval_f1_score": 0.6225832875038563,
260
+ "eval_loss": 1.2013684511184692,
261
+ "eval_runtime": 79.3501,
262
+ "eval_samples_per_second": 196.723,
263
+ "eval_steps_per_second": 2.054,
264
+ "step": 4000
265
+ },
266
+ {
267
+ "epoch": 1.38,
268
+ "learning_rate": 3.419055649241147e-05,
269
+ "loss": 1.1193,
270
+ "step": 4250
271
+ },
272
+ {
273
+ "epoch": 1.38,
274
+ "eval_accuracy": 0.6332479180012812,
275
+ "eval_f1_score": 0.6195484695149066,
276
+ "eval_loss": 1.2123792171478271,
277
+ "eval_runtime": 79.3987,
278
+ "eval_samples_per_second": 196.603,
279
+ "eval_steps_per_second": 2.053,
280
+ "step": 4250
281
+ },
282
+ {
283
+ "epoch": 1.46,
284
+ "learning_rate": 3.313659359190557e-05,
285
+ "loss": 1.1093,
286
+ "step": 4500
287
+ },
288
+ {
289
+ "epoch": 1.46,
290
+ "eval_accuracy": 0.6402306213965406,
291
+ "eval_f1_score": 0.6254312551097587,
292
+ "eval_loss": 1.1897130012512207,
293
+ "eval_runtime": 79.5616,
294
+ "eval_samples_per_second": 196.2,
295
+ "eval_steps_per_second": 2.049,
296
+ "step": 4500
297
+ },
298
+ {
299
+ "epoch": 1.54,
300
+ "learning_rate": 3.208263069139966e-05,
301
+ "loss": 1.0848,
302
+ "step": 4750
303
+ },
304
+ {
305
+ "epoch": 1.54,
306
+ "eval_accuracy": 0.6440102498398462,
307
+ "eval_f1_score": 0.6291617598687145,
308
+ "eval_loss": 1.178646445274353,
309
+ "eval_runtime": 79.2558,
310
+ "eval_samples_per_second": 196.957,
311
+ "eval_steps_per_second": 2.057,
312
+ "step": 4750
313
+ },
314
+ {
315
+ "epoch": 1.62,
316
+ "learning_rate": 3.1028667790893765e-05,
317
+ "loss": 1.1105,
318
+ "step": 5000
319
+ },
320
+ {
321
+ "epoch": 1.62,
322
+ "eval_accuracy": 0.649583600256246,
323
+ "eval_f1_score": 0.6335161931693469,
324
+ "eval_loss": 1.1682264804840088,
325
+ "eval_runtime": 79.3313,
326
+ "eval_samples_per_second": 196.77,
327
+ "eval_steps_per_second": 2.055,
328
+ "step": 5000
329
+ },
330
+ {
331
+ "epoch": 1.7,
332
+ "learning_rate": 2.997470489038786e-05,
333
+ "loss": 1.1014,
334
+ "step": 5250
335
+ },
336
+ {
337
+ "epoch": 1.7,
338
+ "eval_accuracy": 0.6516335682254965,
339
+ "eval_f1_score": 0.6377925627710681,
340
+ "eval_loss": 1.156283974647522,
341
+ "eval_runtime": 79.534,
342
+ "eval_samples_per_second": 196.268,
343
+ "eval_steps_per_second": 2.049,
344
+ "step": 5250
345
+ },
346
+ {
347
+ "epoch": 1.78,
348
+ "learning_rate": 2.8920741989881955e-05,
349
+ "loss": 1.0828,
350
+ "step": 5500
351
+ },
352
+ {
353
+ "epoch": 1.78,
354
+ "eval_accuracy": 0.6459961563100577,
355
+ "eval_f1_score": 0.6313286940706383,
356
+ "eval_loss": 1.1570398807525635,
357
+ "eval_runtime": 79.5476,
358
+ "eval_samples_per_second": 196.235,
359
+ "eval_steps_per_second": 2.049,
360
+ "step": 5500
361
+ },
362
+ {
363
+ "epoch": 1.86,
364
+ "learning_rate": 2.7866779089376055e-05,
365
+ "loss": 1.0803,
366
+ "step": 5750
367
+ },
368
+ {
369
+ "epoch": 1.86,
370
+ "eval_accuracy": 0.6530429212043561,
371
+ "eval_f1_score": 0.6404845558613215,
372
+ "eval_loss": 1.146980881690979,
373
+ "eval_runtime": 79.2229,
374
+ "eval_samples_per_second": 197.039,
375
+ "eval_steps_per_second": 2.057,
376
+ "step": 5750
377
+ },
378
+ {
379
+ "epoch": 1.94,
380
+ "learning_rate": 2.6812816188870155e-05,
381
+ "loss": 1.0799,
382
+ "step": 6000
383
+ },
384
+ {
385
+ "epoch": 1.94,
386
+ "eval_accuracy": 0.6516976297245356,
387
+ "eval_f1_score": 0.638943219006341,
388
+ "eval_loss": 1.1435405015945435,
389
+ "eval_runtime": 79.322,
390
+ "eval_samples_per_second": 196.793,
391
+ "eval_steps_per_second": 2.055,
392
+ "step": 6000
393
+ },
394
+ {
395
+ "epoch": 2.02,
396
+ "learning_rate": 2.575885328836425e-05,
397
+ "loss": 1.0172,
398
+ "step": 6250
399
+ },
400
+ {
401
+ "epoch": 2.02,
402
+ "eval_accuracy": 0.647982062780269,
403
+ "eval_f1_score": 0.6342691459320453,
404
+ "eval_loss": 1.157272219657898,
405
+ "eval_runtime": 79.3295,
406
+ "eval_samples_per_second": 196.774,
407
+ "eval_steps_per_second": 2.055,
408
+ "step": 6250
409
+ },
410
+ {
411
+ "epoch": 2.1,
412
+ "learning_rate": 2.470489038785835e-05,
413
+ "loss": 0.8986,
414
+ "step": 6500
415
+ },
416
+ {
417
+ "epoch": 2.1,
418
+ "eval_accuracy": 0.6488789237668161,
419
+ "eval_f1_score": 0.6382130933006974,
420
+ "eval_loss": 1.1634544134140015,
421
+ "eval_runtime": 79.298,
422
+ "eval_samples_per_second": 196.852,
423
+ "eval_steps_per_second": 2.056,
424
+ "step": 6500
425
+ },
426
+ {
427
+ "epoch": 2.18,
428
+ "learning_rate": 2.3650927487352446e-05,
429
+ "loss": 0.8871,
430
+ "step": 6750
431
+ },
432
+ {
433
+ "epoch": 2.18,
434
+ "eval_accuracy": 0.6540679051889814,
435
+ "eval_f1_score": 0.6404856756767366,
436
+ "eval_loss": 1.1588941812515259,
437
+ "eval_runtime": 79.2578,
438
+ "eval_samples_per_second": 196.952,
439
+ "eval_steps_per_second": 2.057,
440
+ "step": 6750
441
+ },
442
+ {
443
+ "epoch": 2.27,
444
+ "learning_rate": 2.2596964586846546e-05,
445
+ "loss": 0.9066,
446
+ "step": 7000
447
+ },
448
+ {
449
+ "epoch": 2.27,
450
+ "eval_accuracy": 0.6515695067264574,
451
+ "eval_f1_score": 0.6402759354718203,
452
+ "eval_loss": 1.146422266960144,
453
+ "eval_runtime": 79.5354,
454
+ "eval_samples_per_second": 196.265,
455
+ "eval_steps_per_second": 2.049,
456
+ "step": 7000
457
+ },
458
+ {
459
+ "epoch": 2.35,
460
+ "learning_rate": 2.1543001686340643e-05,
461
+ "loss": 0.897,
462
+ "step": 7250
463
+ },
464
+ {
465
+ "epoch": 2.35,
466
+ "eval_accuracy": 0.650352338244715,
467
+ "eval_f1_score": 0.6389635569093923,
468
+ "eval_loss": 1.1561025381088257,
469
+ "eval_runtime": 79.2894,
470
+ "eval_samples_per_second": 196.874,
471
+ "eval_steps_per_second": 2.056,
472
+ "step": 7250
473
+ },
474
+ {
475
+ "epoch": 2.43,
476
+ "learning_rate": 2.048903878583474e-05,
477
+ "loss": 0.8988,
478
+ "step": 7500
479
+ },
480
+ {
481
+ "epoch": 2.43,
482
+ "eval_accuracy": 0.6547725816784112,
483
+ "eval_f1_score": 0.6425914825215071,
484
+ "eval_loss": 1.1453956365585327,
485
+ "eval_runtime": 79.2343,
486
+ "eval_samples_per_second": 197.011,
487
+ "eval_steps_per_second": 2.057,
488
+ "step": 7500
489
+ },
490
+ {
491
+ "epoch": 2.51,
492
+ "learning_rate": 1.943507588532884e-05,
493
+ "loss": 0.9023,
494
+ "step": 7750
495
+ },
496
+ {
497
+ "epoch": 2.51,
498
+ "eval_accuracy": 0.6542600896860986,
499
+ "eval_f1_score": 0.6438243504000527,
500
+ "eval_loss": 1.1429415941238403,
501
+ "eval_runtime": 79.2597,
502
+ "eval_samples_per_second": 196.947,
503
+ "eval_steps_per_second": 2.057,
504
+ "step": 7750
505
+ },
506
+ {
507
+ "epoch": 2.59,
508
+ "learning_rate": 1.8381112984822936e-05,
509
+ "loss": 0.8791,
510
+ "step": 8000
511
+ },
512
+ {
513
+ "epoch": 2.59,
514
+ "eval_accuracy": 0.6572069186418962,
515
+ "eval_f1_score": 0.647070357907258,
516
+ "eval_loss": 1.1421723365783691,
517
+ "eval_runtime": 79.5074,
518
+ "eval_samples_per_second": 196.334,
519
+ "eval_steps_per_second": 2.05,
520
+ "step": 8000
521
+ },
522
+ {
523
+ "epoch": 2.67,
524
+ "learning_rate": 1.7327150084317033e-05,
525
+ "loss": 0.8971,
526
+ "step": 8250
527
+ },
528
+ {
529
+ "epoch": 2.67,
530
+ "eval_accuracy": 0.6581678411274824,
531
+ "eval_f1_score": 0.6474305978141667,
532
+ "eval_loss": 1.1367805004119873,
533
+ "eval_runtime": 79.1975,
534
+ "eval_samples_per_second": 197.102,
535
+ "eval_steps_per_second": 2.058,
536
+ "step": 8250
537
+ },
538
+ {
539
+ "epoch": 2.75,
540
+ "learning_rate": 1.627318718381113e-05,
541
+ "loss": 0.8885,
542
+ "step": 8500
543
+ },
544
+ {
545
+ "epoch": 2.75,
546
+ "eval_accuracy": 0.6608584240871236,
547
+ "eval_f1_score": 0.6487602589453412,
548
+ "eval_loss": 1.122239351272583,
549
+ "eval_runtime": 79.2042,
550
+ "eval_samples_per_second": 197.085,
551
+ "eval_steps_per_second": 2.058,
552
+ "step": 8500
553
+ },
554
+ {
555
+ "epoch": 2.83,
556
+ "learning_rate": 1.5219224283305228e-05,
557
+ "loss": 0.8745,
558
+ "step": 8750
559
+ },
560
+ {
561
+ "epoch": 2.83,
562
+ "eval_accuracy": 0.6579756566303652,
563
+ "eval_f1_score": 0.6483741964856444,
564
+ "eval_loss": 1.1291719675064087,
565
+ "eval_runtime": 79.4248,
566
+ "eval_samples_per_second": 196.538,
567
+ "eval_steps_per_second": 2.052,
568
+ "step": 8750
569
+ },
570
+ {
571
+ "epoch": 2.91,
572
+ "learning_rate": 1.4165261382799327e-05,
573
+ "loss": 0.8837,
574
+ "step": 9000
575
+ },
576
+ {
577
+ "epoch": 2.91,
578
+ "eval_accuracy": 0.6574631646380525,
579
+ "eval_f1_score": 0.64725568770933,
580
+ "eval_loss": 1.1298834085464478,
581
+ "eval_runtime": 79.278,
582
+ "eval_samples_per_second": 196.902,
583
+ "eval_steps_per_second": 2.056,
584
+ "step": 9000
585
+ },
586
+ {
587
+ "epoch": 2.99,
588
+ "learning_rate": 1.3111298482293424e-05,
589
+ "loss": 0.8604,
590
+ "step": 9250
591
+ },
592
+ {
593
+ "epoch": 2.99,
594
+ "eval_accuracy": 0.6556694426649584,
595
+ "eval_f1_score": 0.6459143709441728,
596
+ "eval_loss": 1.1267211437225342,
597
+ "eval_runtime": 79.3015,
598
+ "eval_samples_per_second": 196.844,
599
+ "eval_steps_per_second": 2.055,
600
+ "step": 9250
601
+ },
602
+ {
603
+ "epoch": 3.07,
604
+ "learning_rate": 1.205733558178752e-05,
605
+ "loss": 0.754,
606
+ "step": 9500
607
+ },
608
+ {
609
+ "epoch": 3.07,
610
+ "eval_accuracy": 0.657847533632287,
611
+ "eval_f1_score": 0.6475765832075672,
612
+ "eval_loss": 1.1378750801086426,
613
+ "eval_runtime": 79.3879,
614
+ "eval_samples_per_second": 196.63,
615
+ "eval_steps_per_second": 2.053,
616
+ "step": 9500
617
+ },
618
+ {
619
+ "epoch": 3.16,
620
+ "learning_rate": 1.1003372681281619e-05,
621
+ "loss": 0.7371,
622
+ "step": 9750
623
+ },
624
+ {
625
+ "epoch": 3.16,
626
+ "eval_accuracy": 0.658744394618834,
627
+ "eval_f1_score": 0.648976579094328,
628
+ "eval_loss": 1.1473287343978882,
629
+ "eval_runtime": 79.2492,
630
+ "eval_samples_per_second": 196.974,
631
+ "eval_steps_per_second": 2.057,
632
+ "step": 9750
633
+ },
634
+ {
635
+ "epoch": 3.24,
636
+ "learning_rate": 9.949409780775717e-06,
637
+ "loss": 0.7418,
638
+ "step": 10000
639
+ },
640
+ {
641
+ "epoch": 3.24,
642
+ "eval_accuracy": 0.6577194106342088,
643
+ "eval_f1_score": 0.6481259620512635,
644
+ "eval_loss": 1.1429522037506104,
645
+ "eval_runtime": 79.2653,
646
+ "eval_samples_per_second": 196.934,
647
+ "eval_steps_per_second": 2.056,
648
+ "step": 10000
649
+ },
650
+ {
651
+ "epoch": 3.32,
652
+ "learning_rate": 8.895446880269814e-06,
653
+ "loss": 0.7478,
654
+ "step": 10250
655
+ },
656
+ {
657
+ "epoch": 3.32,
658
+ "eval_accuracy": 0.6581678411274824,
659
+ "eval_f1_score": 0.6480298727343113,
660
+ "eval_loss": 1.147200107574463,
661
+ "eval_runtime": 79.3533,
662
+ "eval_samples_per_second": 196.715,
663
+ "eval_steps_per_second": 2.054,
664
+ "step": 10250
665
+ },
666
+ {
667
+ "epoch": 3.4,
668
+ "learning_rate": 7.841483979763913e-06,
669
+ "loss": 0.7214,
670
+ "step": 10500
671
+ },
672
+ {
673
+ "epoch": 3.4,
674
+ "eval_accuracy": 0.6593209481101858,
675
+ "eval_f1_score": 0.6487748216018238,
676
+ "eval_loss": 1.1483687162399292,
677
+ "eval_runtime": 79.2151,
678
+ "eval_samples_per_second": 197.058,
679
+ "eval_steps_per_second": 2.058,
680
+ "step": 10500
681
+ },
682
+ {
683
+ "epoch": 3.48,
684
+ "learning_rate": 6.787521079258011e-06,
685
+ "loss": 0.7378,
686
+ "step": 10750
687
+ },
688
+ {
689
+ "epoch": 3.48,
690
+ "eval_accuracy": 0.6613709160794362,
691
+ "eval_f1_score": 0.6515662944938931,
692
+ "eval_loss": 1.1435444355010986,
693
+ "eval_runtime": 79.1971,
694
+ "eval_samples_per_second": 197.103,
695
+ "eval_steps_per_second": 2.058,
696
+ "step": 10750
697
+ },
698
+ {
699
+ "epoch": 3.56,
700
+ "learning_rate": 5.733558178752108e-06,
701
+ "loss": 0.746,
702
+ "step": 11000
703
+ },
704
+ {
705
+ "epoch": 3.56,
706
+ "eval_accuracy": 0.6582959641255606,
707
+ "eval_f1_score": 0.649775469900882,
708
+ "eval_loss": 1.142891764640808,
709
+ "eval_runtime": 79.304,
710
+ "eval_samples_per_second": 196.838,
711
+ "eval_steps_per_second": 2.055,
712
+ "step": 11000
713
+ },
714
+ {
715
+ "epoch": 3.64,
716
+ "learning_rate": 4.6795952782462055e-06,
717
+ "loss": 0.7355,
718
+ "step": 11250
719
+ },
720
+ {
721
+ "epoch": 3.64,
722
+ "eval_accuracy": 0.6614349775784754,
723
+ "eval_f1_score": 0.6523276061496733,
724
+ "eval_loss": 1.1420927047729492,
725
+ "eval_runtime": 79.4328,
726
+ "eval_samples_per_second": 196.518,
727
+ "eval_steps_per_second": 2.052,
728
+ "step": 11250
729
+ },
730
+ {
731
+ "epoch": 3.72,
732
+ "learning_rate": 3.625632377740304e-06,
733
+ "loss": 0.7207,
734
+ "step": 11500
735
+ },
736
+ {
737
+ "epoch": 3.72,
738
+ "eval_accuracy": 0.6618193465727098,
739
+ "eval_f1_score": 0.6531447427234552,
740
+ "eval_loss": 1.1392197608947754,
741
+ "eval_runtime": 79.3578,
742
+ "eval_samples_per_second": 196.704,
743
+ "eval_steps_per_second": 2.054,
744
+ "step": 11500
745
+ }
746
+ ],
747
+ "max_steps": 12360,
748
+ "num_train_epochs": 4,
749
+ "total_flos": 1.4666146788722688e+17,
750
+ "trial_name": null,
751
+ "trial_params": null
752
+ }