SerdarHelli commited on
Commit
2a3559e
·
1 Parent(s): 9e9423c

ThyroidTumorClassification

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 74.89,
3
- "eval_accuracy": 0.8,
4
- "eval_loss": 0.5911628603935242,
5
- "eval_runtime": 0.4617,
6
- "eval_samples_per_second": 151.624,
7
- "eval_steps_per_second": 6.498,
8
- "total_flos": 5.2149900813992755e+17,
9
- "train_loss": 0.47956085205078125,
10
- "train_runtime": 368.1446,
11
- "train_samples_per_second": 56.432,
12
- "train_steps_per_second": 0.407
13
  }
 
1
  {
2
+ "epoch": 149.89,
3
+ "eval_f1": 0.9032258064516129,
4
+ "eval_loss": 0.45062127709388733,
5
+ "eval_runtime": 0.5727,
6
+ "eval_samples_per_second": 122.228,
7
+ "eval_steps_per_second": 5.238,
8
+ "total_flos": 1.0435256966870508e+18,
9
+ "train_loss": 0.3680222670237223,
10
+ "train_runtime": 877.204,
11
+ "train_samples_per_second": 47.366,
12
+ "train_steps_per_second": 0.342
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 74.89,
3
- "eval_accuracy": 0.8,
4
- "eval_loss": 0.5911628603935242,
5
- "eval_runtime": 0.4617,
6
- "eval_samples_per_second": 151.624,
7
- "eval_steps_per_second": 6.498
8
  }
 
1
  {
2
+ "epoch": 149.89,
3
+ "eval_f1": 0.9032258064516129,
4
+ "eval_loss": 0.45062127709388733,
5
+ "eval_runtime": 0.5727,
6
+ "eval_samples_per_second": 122.228,
7
+ "eval_steps_per_second": 5.238
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c3b3104392b943a286ff7bca287f28d3567158c999717a61dd2cd9114628a1
3
  size 111347349
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1111386620231e1358294477e6b15819e1f4b775f95505cb21a90993de59864
3
  size 111347349
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 74.89,
3
- "total_flos": 5.2149900813992755e+17,
4
- "train_loss": 0.47956085205078125,
5
- "train_runtime": 368.1446,
6
- "train_samples_per_second": 56.432,
7
- "train_steps_per_second": 0.407
8
  }
 
1
  {
2
+ "epoch": 149.89,
3
+ "total_flos": 1.0435256966870508e+18,
4
+ "train_loss": 0.3680222670237223,
5
+ "train_runtime": 877.204,
6
+ "train_samples_per_second": 47.366,
7
+ "train_steps_per_second": 0.342
8
  }
trainer_state.json CHANGED
@@ -1,736 +1,1447 @@
1
  {
2
- "best_metric": 0.8,
3
- "best_model_checkpoint": "Cvt-finetuned-thyroid/checkpoint-14",
4
- "epoch": 74.88888888888889,
5
- "global_step": 150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.89,
12
- "eval_accuracy": 0.6,
13
- "eval_loss": 0.6705443263053894,
14
- "eval_runtime": 0.462,
15
- "eval_samples_per_second": 151.523,
16
- "eval_steps_per_second": 6.494,
17
  "step": 2
18
  },
19
  {
20
  "epoch": 1.89,
21
- "eval_accuracy": 0.6285714285714286,
22
- "eval_loss": 0.665534257888794,
23
- "eval_runtime": 0.4493,
24
- "eval_samples_per_second": 155.788,
25
- "eval_steps_per_second": 6.677,
26
  "step": 4
27
  },
28
  {
29
  "epoch": 2.89,
30
- "eval_accuracy": 0.6714285714285714,
31
- "eval_loss": 0.6565709710121155,
32
- "eval_runtime": 0.4608,
33
- "eval_samples_per_second": 151.906,
34
- "eval_steps_per_second": 6.51,
35
  "step": 6
36
  },
37
  {
38
  "epoch": 3.89,
39
- "eval_accuracy": 0.7428571428571429,
40
- "eval_loss": 0.643831729888916,
41
- "eval_runtime": 0.4726,
42
- "eval_samples_per_second": 148.132,
43
- "eval_steps_per_second": 6.349,
44
  "step": 8
45
  },
46
  {
47
  "epoch": 4.89,
48
- "eval_accuracy": 0.7571428571428571,
49
- "eval_loss": 0.6282520890235901,
50
- "eval_runtime": 0.4465,
51
- "eval_samples_per_second": 156.767,
52
- "eval_steps_per_second": 6.719,
53
  "step": 10
54
  },
55
  {
56
  "epoch": 5.89,
57
- "eval_accuracy": 0.7857142857142857,
58
- "eval_loss": 0.6104965209960938,
59
- "eval_runtime": 0.448,
60
- "eval_samples_per_second": 156.239,
61
- "eval_steps_per_second": 6.696,
62
  "step": 12
63
  },
64
  {
65
  "epoch": 6.89,
66
- "eval_accuracy": 0.8,
67
- "eval_loss": 0.5911628603935242,
68
- "eval_runtime": 0.4464,
69
- "eval_samples_per_second": 156.804,
70
- "eval_steps_per_second": 6.72,
71
  "step": 14
72
  },
73
  {
74
  "epoch": 7.89,
75
- "eval_accuracy": 0.8,
76
- "eval_loss": 0.5708973407745361,
77
- "eval_runtime": 0.4463,
78
- "eval_samples_per_second": 156.848,
79
- "eval_steps_per_second": 6.722,
80
  "step": 16
81
  },
82
  {
83
  "epoch": 8.89,
84
- "eval_accuracy": 0.8,
85
- "eval_loss": 0.5531989336013794,
86
- "eval_runtime": 0.439,
87
- "eval_samples_per_second": 159.442,
88
- "eval_steps_per_second": 6.833,
89
  "step": 18
90
  },
91
  {
92
  "epoch": 9.89,
93
- "eval_accuracy": 0.8,
94
- "eval_loss": 0.5385797619819641,
95
- "eval_runtime": 0.4602,
96
- "eval_samples_per_second": 152.109,
97
- "eval_steps_per_second": 6.519,
98
  "step": 20
99
  },
100
  {
101
  "epoch": 10.89,
102
- "eval_accuracy": 0.8,
103
- "eval_loss": 0.5270153880119324,
104
- "eval_runtime": 0.4401,
105
- "eval_samples_per_second": 159.068,
106
- "eval_steps_per_second": 6.817,
107
  "step": 22
108
  },
109
  {
110
  "epoch": 11.89,
111
- "eval_accuracy": 0.8,
112
- "eval_loss": 0.5181472301483154,
113
- "eval_runtime": 0.4475,
114
- "eval_samples_per_second": 156.407,
115
- "eval_steps_per_second": 6.703,
116
  "step": 24
117
  },
118
  {
119
  "epoch": 12.44,
120
- "learning_rate": 9.25925925925926e-06,
121
- "loss": 0.6298,
122
  "step": 25
123
  },
124
  {
125
  "epoch": 12.89,
126
- "eval_accuracy": 0.8,
127
- "eval_loss": 0.5123368501663208,
128
- "eval_runtime": 0.4347,
129
- "eval_samples_per_second": 161.016,
130
- "eval_steps_per_second": 6.901,
131
  "step": 26
132
  },
133
  {
134
  "epoch": 13.89,
135
- "eval_accuracy": 0.8,
136
- "eval_loss": 0.5075790286064148,
137
- "eval_runtime": 0.4347,
138
- "eval_samples_per_second": 161.026,
139
- "eval_steps_per_second": 6.901,
140
  "step": 28
141
  },
142
  {
143
  "epoch": 14.89,
144
- "eval_accuracy": 0.8,
145
- "eval_loss": 0.5036987066268921,
146
- "eval_runtime": 0.4334,
147
- "eval_samples_per_second": 161.512,
148
- "eval_steps_per_second": 6.922,
149
  "step": 30
150
  },
151
  {
152
  "epoch": 15.89,
153
- "eval_accuracy": 0.8,
154
- "eval_loss": 0.5011651515960693,
155
- "eval_runtime": 0.4565,
156
- "eval_samples_per_second": 153.34,
157
- "eval_steps_per_second": 6.572,
158
  "step": 32
159
  },
160
  {
161
  "epoch": 16.89,
162
- "eval_accuracy": 0.8,
163
- "eval_loss": 0.498863160610199,
164
- "eval_runtime": 0.4641,
165
- "eval_samples_per_second": 150.834,
166
- "eval_steps_per_second": 6.464,
167
  "step": 34
168
  },
169
  {
170
  "epoch": 17.89,
171
- "eval_accuracy": 0.8,
172
- "eval_loss": 0.497054785490036,
173
- "eval_runtime": 0.44,
174
- "eval_samples_per_second": 159.098,
175
- "eval_steps_per_second": 6.818,
176
  "step": 36
177
  },
178
  {
179
  "epoch": 18.89,
180
- "eval_accuracy": 0.8,
181
- "eval_loss": 0.4955058991909027,
182
- "eval_runtime": 0.4462,
183
- "eval_samples_per_second": 156.867,
184
- "eval_steps_per_second": 6.723,
185
  "step": 38
186
  },
187
  {
188
  "epoch": 19.89,
189
- "eval_accuracy": 0.8,
190
- "eval_loss": 0.4940197467803955,
191
- "eval_runtime": 0.4416,
192
- "eval_samples_per_second": 158.514,
193
- "eval_steps_per_second": 6.793,
194
  "step": 40
195
  },
196
  {
197
  "epoch": 20.89,
198
- "eval_accuracy": 0.8,
199
- "eval_loss": 0.49268171191215515,
200
- "eval_runtime": 0.4504,
201
- "eval_samples_per_second": 155.428,
202
- "eval_steps_per_second": 6.661,
203
  "step": 42
204
  },
205
  {
206
  "epoch": 21.89,
207
- "eval_accuracy": 0.8,
208
- "eval_loss": 0.49133333563804626,
209
- "eval_runtime": 0.4327,
210
- "eval_samples_per_second": 161.788,
211
- "eval_steps_per_second": 6.934,
212
  "step": 44
213
  },
214
  {
215
  "epoch": 22.89,
216
- "eval_accuracy": 0.8,
217
- "eval_loss": 0.489890456199646,
218
- "eval_runtime": 0.4454,
219
- "eval_samples_per_second": 157.155,
220
- "eval_steps_per_second": 6.735,
221
  "step": 46
222
  },
223
  {
224
  "epoch": 23.89,
225
- "eval_accuracy": 0.8,
226
- "eval_loss": 0.48838457465171814,
227
- "eval_runtime": 0.4388,
228
- "eval_samples_per_second": 159.525,
229
- "eval_steps_per_second": 6.837,
230
  "step": 48
231
  },
232
  {
233
  "epoch": 24.89,
234
- "learning_rate": 7.4074074074074075e-06,
235
- "loss": 0.4947,
236
  "step": 50
237
  },
238
  {
239
  "epoch": 24.89,
240
- "eval_accuracy": 0.8,
241
- "eval_loss": 0.4868685007095337,
242
- "eval_runtime": 0.44,
243
- "eval_samples_per_second": 159.08,
244
- "eval_steps_per_second": 6.818,
245
  "step": 50
246
  },
247
  {
248
  "epoch": 25.89,
249
- "eval_accuracy": 0.8,
250
- "eval_loss": 0.48541581630706787,
251
- "eval_runtime": 0.4379,
252
- "eval_samples_per_second": 159.843,
253
- "eval_steps_per_second": 6.85,
254
  "step": 52
255
  },
256
  {
257
  "epoch": 26.89,
258
- "eval_accuracy": 0.8,
259
- "eval_loss": 0.4839009940624237,
260
- "eval_runtime": 0.4366,
261
- "eval_samples_per_second": 160.316,
262
- "eval_steps_per_second": 6.871,
263
  "step": 54
264
  },
265
  {
266
  "epoch": 27.89,
267
- "eval_accuracy": 0.8,
268
- "eval_loss": 0.482656329870224,
269
- "eval_runtime": 0.4434,
270
- "eval_samples_per_second": 157.854,
271
- "eval_steps_per_second": 6.765,
272
  "step": 56
273
  },
274
  {
275
  "epoch": 28.89,
276
- "eval_accuracy": 0.8,
277
- "eval_loss": 0.4816107749938965,
278
- "eval_runtime": 0.4432,
279
- "eval_samples_per_second": 157.931,
280
- "eval_steps_per_second": 6.768,
281
  "step": 58
282
  },
283
  {
284
  "epoch": 29.89,
285
- "eval_accuracy": 0.8,
286
- "eval_loss": 0.4808529019355774,
287
- "eval_runtime": 0.4449,
288
- "eval_samples_per_second": 157.322,
289
- "eval_steps_per_second": 6.742,
290
  "step": 60
291
  },
292
  {
293
  "epoch": 30.89,
294
- "eval_accuracy": 0.8,
295
- "eval_loss": 0.4799829125404358,
296
- "eval_runtime": 0.4386,
297
- "eval_samples_per_second": 159.609,
298
- "eval_steps_per_second": 6.84,
299
  "step": 62
300
  },
301
  {
302
  "epoch": 31.89,
303
- "eval_accuracy": 0.8,
304
- "eval_loss": 0.479232519865036,
305
- "eval_runtime": 0.4533,
306
- "eval_samples_per_second": 154.433,
307
- "eval_steps_per_second": 6.619,
308
  "step": 64
309
  },
310
  {
311
  "epoch": 32.89,
312
- "eval_accuracy": 0.8,
313
- "eval_loss": 0.4783066213130951,
314
- "eval_runtime": 0.4417,
315
- "eval_samples_per_second": 158.47,
316
- "eval_steps_per_second": 6.792,
317
  "step": 66
318
  },
319
  {
320
  "epoch": 33.89,
321
- "eval_accuracy": 0.8,
322
- "eval_loss": 0.4775735139846802,
323
- "eval_runtime": 0.4472,
324
- "eval_samples_per_second": 156.531,
325
- "eval_steps_per_second": 6.708,
326
  "step": 68
327
  },
328
  {
329
  "epoch": 34.89,
330
- "eval_accuracy": 0.8,
331
- "eval_loss": 0.4768332839012146,
332
- "eval_runtime": 0.4342,
333
- "eval_samples_per_second": 161.213,
334
- "eval_steps_per_second": 6.909,
335
  "step": 70
336
  },
337
  {
338
  "epoch": 35.89,
339
- "eval_accuracy": 0.8,
340
- "eval_loss": 0.47618430852890015,
341
- "eval_runtime": 0.4501,
342
- "eval_samples_per_second": 155.536,
343
- "eval_steps_per_second": 6.666,
344
  "step": 72
345
  },
346
  {
347
  "epoch": 36.89,
348
- "eval_accuracy": 0.8,
349
- "eval_loss": 0.4756462574005127,
350
- "eval_runtime": 0.4411,
351
- "eval_samples_per_second": 158.688,
352
- "eval_steps_per_second": 6.801,
353
  "step": 74
354
  },
355
  {
356
  "epoch": 37.44,
357
- "learning_rate": 5.555555555555557e-06,
358
- "loss": 0.4688,
359
  "step": 75
360
  },
361
  {
362
  "epoch": 37.89,
363
- "eval_accuracy": 0.8,
364
- "eval_loss": 0.47506338357925415,
365
- "eval_runtime": 0.4697,
366
- "eval_samples_per_second": 149.047,
367
- "eval_steps_per_second": 6.388,
368
  "step": 76
369
  },
370
  {
371
  "epoch": 38.89,
372
- "eval_accuracy": 0.8,
373
- "eval_loss": 0.4744085669517517,
374
- "eval_runtime": 0.444,
375
- "eval_samples_per_second": 157.644,
376
- "eval_steps_per_second": 6.756,
377
  "step": 78
378
  },
379
  {
380
  "epoch": 39.89,
381
- "eval_accuracy": 0.8,
382
- "eval_loss": 0.4739179313182831,
383
- "eval_runtime": 0.4514,
384
- "eval_samples_per_second": 155.078,
385
- "eval_steps_per_second": 6.646,
386
  "step": 80
387
  },
388
  {
389
  "epoch": 40.89,
390
- "eval_accuracy": 0.8,
391
- "eval_loss": 0.4733032286167145,
392
- "eval_runtime": 0.4549,
393
- "eval_samples_per_second": 153.865,
394
- "eval_steps_per_second": 6.594,
395
  "step": 82
396
  },
397
  {
398
  "epoch": 41.89,
399
- "eval_accuracy": 0.8,
400
- "eval_loss": 0.4729520082473755,
401
- "eval_runtime": 0.4432,
402
- "eval_samples_per_second": 157.928,
403
- "eval_steps_per_second": 6.768,
404
  "step": 84
405
  },
406
  {
407
  "epoch": 42.89,
408
- "eval_accuracy": 0.8,
409
- "eval_loss": 0.4726985991001129,
410
- "eval_runtime": 0.4582,
411
- "eval_samples_per_second": 152.787,
412
- "eval_steps_per_second": 6.548,
413
  "step": 86
414
  },
415
  {
416
  "epoch": 43.89,
417
- "eval_accuracy": 0.8,
418
- "eval_loss": 0.4726087749004364,
419
- "eval_runtime": 0.4498,
420
- "eval_samples_per_second": 155.628,
421
- "eval_steps_per_second": 6.67,
422
  "step": 88
423
  },
424
  {
425
  "epoch": 44.89,
426
- "eval_accuracy": 0.8,
427
- "eval_loss": 0.47239962220191956,
428
- "eval_runtime": 0.4376,
429
- "eval_samples_per_second": 159.981,
430
- "eval_steps_per_second": 6.856,
431
  "step": 90
432
  },
433
  {
434
  "epoch": 45.89,
435
- "eval_accuracy": 0.8,
436
- "eval_loss": 0.4720509648323059,
437
- "eval_runtime": 0.4582,
438
- "eval_samples_per_second": 152.761,
439
- "eval_steps_per_second": 6.547,
440
  "step": 92
441
  },
442
  {
443
  "epoch": 46.89,
444
- "eval_accuracy": 0.8,
445
- "eval_loss": 0.47151511907577515,
446
- "eval_runtime": 0.4581,
447
- "eval_samples_per_second": 152.803,
448
- "eval_steps_per_second": 6.549,
449
  "step": 94
450
  },
451
  {
452
  "epoch": 47.89,
453
- "eval_accuracy": 0.8,
454
- "eval_loss": 0.4711145758628845,
455
- "eval_runtime": 0.4536,
456
- "eval_samples_per_second": 154.323,
457
- "eval_steps_per_second": 6.614,
458
  "step": 96
459
  },
460
  {
461
  "epoch": 48.89,
462
- "eval_accuracy": 0.8,
463
- "eval_loss": 0.47070595622062683,
464
- "eval_runtime": 0.4562,
465
- "eval_samples_per_second": 153.435,
466
- "eval_steps_per_second": 6.576,
467
  "step": 98
468
  },
469
  {
470
  "epoch": 49.89,
471
- "learning_rate": 3.7037037037037037e-06,
472
- "loss": 0.4378,
473
  "step": 100
474
  },
475
  {
476
  "epoch": 49.89,
477
- "eval_accuracy": 0.8,
478
- "eval_loss": 0.4701813757419586,
479
- "eval_runtime": 0.4473,
480
- "eval_samples_per_second": 156.496,
481
- "eval_steps_per_second": 6.707,
482
  "step": 100
483
  },
484
  {
485
  "epoch": 50.89,
486
- "eval_accuracy": 0.8,
487
- "eval_loss": 0.46980804204940796,
488
- "eval_runtime": 0.4344,
489
- "eval_samples_per_second": 161.135,
490
- "eval_steps_per_second": 6.906,
491
  "step": 102
492
  },
493
  {
494
  "epoch": 51.89,
495
- "eval_accuracy": 0.8,
496
- "eval_loss": 0.46947282552719116,
497
- "eval_runtime": 0.4638,
498
- "eval_samples_per_second": 150.921,
499
- "eval_steps_per_second": 6.468,
500
  "step": 104
501
  },
502
  {
503
  "epoch": 52.89,
504
- "eval_accuracy": 0.8,
505
- "eval_loss": 0.46929818391799927,
506
- "eval_runtime": 0.448,
507
- "eval_samples_per_second": 156.238,
508
- "eval_steps_per_second": 6.696,
509
  "step": 106
510
  },
511
  {
512
  "epoch": 53.89,
513
- "eval_accuracy": 0.8,
514
- "eval_loss": 0.4691893458366394,
515
- "eval_runtime": 0.4537,
516
- "eval_samples_per_second": 154.298,
517
- "eval_steps_per_second": 6.613,
518
  "step": 108
519
  },
520
  {
521
  "epoch": 54.89,
522
- "eval_accuracy": 0.8,
523
- "eval_loss": 0.46898388862609863,
524
- "eval_runtime": 0.4451,
525
- "eval_samples_per_second": 157.257,
526
- "eval_steps_per_second": 6.74,
527
  "step": 110
528
  },
529
  {
530
  "epoch": 55.89,
531
- "eval_accuracy": 0.8,
532
- "eval_loss": 0.46869638562202454,
533
- "eval_runtime": 0.4465,
534
- "eval_samples_per_second": 156.786,
535
- "eval_steps_per_second": 6.719,
536
  "step": 112
537
  },
538
  {
539
  "epoch": 56.89,
540
- "eval_accuracy": 0.8,
541
- "eval_loss": 0.4685034155845642,
542
- "eval_runtime": 0.4553,
543
- "eval_samples_per_second": 153.754,
544
- "eval_steps_per_second": 6.589,
545
  "step": 114
546
  },
547
  {
548
  "epoch": 57.89,
549
- "eval_accuracy": 0.8,
550
- "eval_loss": 0.46839845180511475,
551
- "eval_runtime": 0.4475,
552
- "eval_samples_per_second": 156.417,
553
- "eval_steps_per_second": 6.704,
554
  "step": 116
555
  },
556
  {
557
  "epoch": 58.89,
558
- "eval_accuracy": 0.8,
559
- "eval_loss": 0.46825066208839417,
560
- "eval_runtime": 0.446,
561
- "eval_samples_per_second": 156.968,
562
- "eval_steps_per_second": 6.727,
563
  "step": 118
564
  },
565
  {
566
  "epoch": 59.89,
567
- "eval_accuracy": 0.8,
568
- "eval_loss": 0.46806439757347107,
569
- "eval_runtime": 0.4552,
570
- "eval_samples_per_second": 153.787,
571
- "eval_steps_per_second": 6.591,
572
  "step": 120
573
  },
574
  {
575
  "epoch": 60.89,
576
- "eval_accuracy": 0.8,
577
- "eval_loss": 0.4679650366306305,
578
- "eval_runtime": 0.45,
579
- "eval_samples_per_second": 155.565,
580
- "eval_steps_per_second": 6.667,
581
  "step": 122
582
  },
583
  {
584
  "epoch": 61.89,
585
- "eval_accuracy": 0.8,
586
- "eval_loss": 0.4678630232810974,
587
- "eval_runtime": 0.4524,
588
- "eval_samples_per_second": 154.721,
589
- "eval_steps_per_second": 6.631,
590
  "step": 124
591
  },
592
  {
593
  "epoch": 62.44,
594
- "learning_rate": 1.8518518518518519e-06,
595
- "loss": 0.4274,
596
  "step": 125
597
  },
598
  {
599
  "epoch": 62.89,
600
- "eval_accuracy": 0.8,
601
- "eval_loss": 0.467722088098526,
602
- "eval_runtime": 0.4455,
603
- "eval_samples_per_second": 157.131,
604
- "eval_steps_per_second": 6.734,
605
  "step": 126
606
  },
607
  {
608
  "epoch": 63.89,
609
- "eval_accuracy": 0.8,
610
- "eval_loss": 0.4675312638282776,
611
- "eval_runtime": 0.4574,
612
- "eval_samples_per_second": 153.047,
613
- "eval_steps_per_second": 6.559,
614
  "step": 128
615
  },
616
  {
617
  "epoch": 64.89,
618
- "eval_accuracy": 0.8,
619
- "eval_loss": 0.46731725335121155,
620
- "eval_runtime": 0.4688,
621
- "eval_samples_per_second": 149.332,
622
- "eval_steps_per_second": 6.4,
623
  "step": 130
624
  },
625
  {
626
  "epoch": 65.89,
627
- "eval_accuracy": 0.8,
628
- "eval_loss": 0.467153936624527,
629
- "eval_runtime": 0.4613,
630
- "eval_samples_per_second": 151.749,
631
- "eval_steps_per_second": 6.504,
632
  "step": 132
633
  },
634
  {
635
  "epoch": 66.89,
636
- "eval_accuracy": 0.8,
637
- "eval_loss": 0.46700313687324524,
638
- "eval_runtime": 0.4516,
639
- "eval_samples_per_second": 155.016,
640
- "eval_steps_per_second": 6.644,
641
  "step": 134
642
  },
643
  {
644
  "epoch": 67.89,
645
- "eval_accuracy": 0.8,
646
- "eval_loss": 0.46686962246894836,
647
- "eval_runtime": 0.4449,
648
- "eval_samples_per_second": 157.335,
649
- "eval_steps_per_second": 6.743,
650
  "step": 136
651
  },
652
  {
653
  "epoch": 68.89,
654
- "eval_accuracy": 0.8,
655
- "eval_loss": 0.46674275398254395,
656
- "eval_runtime": 0.4512,
657
- "eval_samples_per_second": 155.137,
658
- "eval_steps_per_second": 6.649,
659
  "step": 138
660
  },
661
  {
662
  "epoch": 69.89,
663
- "eval_accuracy": 0.8,
664
- "eval_loss": 0.4666597545146942,
665
- "eval_runtime": 0.4478,
666
- "eval_samples_per_second": 156.329,
667
- "eval_steps_per_second": 6.7,
668
  "step": 140
669
  },
670
  {
671
  "epoch": 70.89,
672
- "eval_accuracy": 0.8,
673
- "eval_loss": 0.4665738344192505,
674
- "eval_runtime": 0.4419,
675
- "eval_samples_per_second": 158.423,
676
- "eval_steps_per_second": 6.79,
677
  "step": 142
678
  },
679
  {
680
  "epoch": 71.89,
681
- "eval_accuracy": 0.8,
682
- "eval_loss": 0.46650615334510803,
683
- "eval_runtime": 0.45,
684
- "eval_samples_per_second": 155.562,
685
- "eval_steps_per_second": 6.667,
686
  "step": 144
687
  },
688
  {
689
  "epoch": 72.89,
690
- "eval_accuracy": 0.8,
691
- "eval_loss": 0.4664507806301117,
692
- "eval_runtime": 0.4559,
693
- "eval_samples_per_second": 153.528,
694
- "eval_steps_per_second": 6.58,
695
  "step": 146
696
  },
697
  {
698
  "epoch": 73.89,
699
- "eval_accuracy": 0.8,
700
- "eval_loss": 0.46641966700553894,
701
- "eval_runtime": 0.4546,
702
- "eval_samples_per_second": 153.981,
703
- "eval_steps_per_second": 6.599,
704
  "step": 148
705
  },
706
  {
707
  "epoch": 74.89,
708
- "learning_rate": 0.0,
709
- "loss": 0.4187,
710
  "step": 150
711
  },
712
  {
713
  "epoch": 74.89,
714
- "eval_accuracy": 0.8,
715
- "eval_loss": 0.4664078950881958,
716
- "eval_runtime": 0.444,
717
- "eval_samples_per_second": 157.667,
718
- "eval_steps_per_second": 6.757,
719
  "step": 150
720
  },
721
  {
722
- "epoch": 74.89,
723
- "step": 150,
724
- "total_flos": 5.2149900813992755e+17,
725
- "train_loss": 0.47956085205078125,
726
- "train_runtime": 368.1446,
727
- "train_samples_per_second": 56.432,
728
- "train_steps_per_second": 0.407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  }
730
  ],
731
- "max_steps": 150,
732
- "num_train_epochs": 75,
733
- "total_flos": 5.2149900813992755e+17,
734
  "trial_name": null,
735
  "trial_params": null
736
  }
 
1
  {
2
+ "best_metric": 0.9032258064516129,
3
+ "best_model_checkpoint": "Cvt-finetuned-thyroid/checkpoint-218",
4
+ "epoch": 149.88888888888889,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.89,
12
+ "eval_f1": 0.8474576271186439,
13
+ "eval_loss": 0.6517891883850098,
14
+ "eval_runtime": 0.5116,
15
+ "eval_samples_per_second": 136.828,
16
+ "eval_steps_per_second": 5.864,
17
  "step": 2
18
  },
19
  {
20
  "epoch": 1.89,
21
+ "eval_f1": 0.8474576271186439,
22
+ "eval_loss": 0.6492317318916321,
23
+ "eval_runtime": 0.4952,
24
+ "eval_samples_per_second": 141.346,
25
+ "eval_steps_per_second": 6.058,
26
  "step": 4
27
  },
28
  {
29
  "epoch": 2.89,
30
+ "eval_f1": 0.8474576271186439,
31
+ "eval_loss": 0.6447046995162964,
32
+ "eval_runtime": 0.6963,
33
+ "eval_samples_per_second": 100.538,
34
+ "eval_steps_per_second": 4.309,
35
  "step": 6
36
  },
37
  {
38
  "epoch": 3.89,
39
+ "eval_f1": 0.8333333333333334,
40
+ "eval_loss": 0.6384778022766113,
41
+ "eval_runtime": 0.5,
42
+ "eval_samples_per_second": 140.006,
43
+ "eval_steps_per_second": 6.0,
44
  "step": 8
45
  },
46
  {
47
  "epoch": 4.89,
48
+ "eval_f1": 0.8524590163934426,
49
+ "eval_loss": 0.6307393312454224,
50
+ "eval_runtime": 0.5113,
51
+ "eval_samples_per_second": 136.904,
52
+ "eval_steps_per_second": 5.867,
53
  "step": 10
54
  },
55
  {
56
  "epoch": 5.89,
57
+ "eval_f1": 0.8709677419354839,
58
+ "eval_loss": 0.621659517288208,
59
+ "eval_runtime": 0.5005,
60
+ "eval_samples_per_second": 139.863,
61
+ "eval_steps_per_second": 5.994,
62
  "step": 12
63
  },
64
  {
65
  "epoch": 6.89,
66
+ "eval_f1": 0.8709677419354839,
67
+ "eval_loss": 0.6113398671150208,
68
+ "eval_runtime": 0.5076,
69
+ "eval_samples_per_second": 137.913,
70
+ "eval_steps_per_second": 5.911,
71
  "step": 14
72
  },
73
  {
74
  "epoch": 7.89,
75
+ "eval_f1": 0.8799999999999999,
76
+ "eval_loss": 0.5999341011047363,
77
+ "eval_runtime": 0.504,
78
+ "eval_samples_per_second": 138.893,
79
+ "eval_steps_per_second": 5.953,
80
  "step": 16
81
  },
82
  {
83
  "epoch": 8.89,
84
+ "eval_f1": 0.888888888888889,
85
+ "eval_loss": 0.587955117225647,
86
+ "eval_runtime": 0.5041,
87
+ "eval_samples_per_second": 138.872,
88
+ "eval_steps_per_second": 5.952,
89
  "step": 18
90
  },
91
  {
92
  "epoch": 9.89,
93
+ "eval_f1": 0.888888888888889,
94
+ "eval_loss": 0.5761495232582092,
95
+ "eval_runtime": 0.5087,
96
+ "eval_samples_per_second": 137.618,
97
+ "eval_steps_per_second": 5.898,
98
  "step": 20
99
  },
100
  {
101
  "epoch": 10.89,
102
+ "eval_f1": 0.888888888888889,
103
+ "eval_loss": 0.5640864372253418,
104
+ "eval_runtime": 0.5127,
105
+ "eval_samples_per_second": 136.532,
106
+ "eval_steps_per_second": 5.851,
107
  "step": 22
108
  },
109
  {
110
  "epoch": 11.89,
111
+ "eval_f1": 0.888888888888889,
112
+ "eval_loss": 0.5520942211151123,
113
+ "eval_runtime": 0.5104,
114
+ "eval_samples_per_second": 137.159,
115
+ "eval_steps_per_second": 5.878,
116
  "step": 24
117
  },
118
  {
119
  "epoch": 12.44,
120
+ "learning_rate": 8.333333333333334e-06,
121
+ "loss": 0.6685,
122
  "step": 25
123
  },
124
  {
125
  "epoch": 12.89,
126
+ "eval_f1": 0.888888888888889,
127
+ "eval_loss": 0.5406573414802551,
128
+ "eval_runtime": 0.5165,
129
+ "eval_samples_per_second": 135.523,
130
+ "eval_steps_per_second": 5.808,
131
  "step": 26
132
  },
133
  {
134
  "epoch": 13.89,
135
+ "eval_f1": 0.888888888888889,
136
+ "eval_loss": 0.5298128128051758,
137
+ "eval_runtime": 0.5102,
138
+ "eval_samples_per_second": 137.199,
139
+ "eval_steps_per_second": 5.88,
140
  "step": 28
141
  },
142
  {
143
  "epoch": 14.89,
144
+ "eval_f1": 0.888888888888889,
145
+ "eval_loss": 0.5198087096214294,
146
+ "eval_runtime": 0.51,
147
+ "eval_samples_per_second": 137.243,
148
+ "eval_steps_per_second": 5.882,
149
  "step": 30
150
  },
151
  {
152
  "epoch": 15.89,
153
+ "eval_f1": 0.888888888888889,
154
+ "eval_loss": 0.5109697580337524,
155
+ "eval_runtime": 0.5205,
156
+ "eval_samples_per_second": 134.496,
157
+ "eval_steps_per_second": 5.764,
158
  "step": 32
159
  },
160
  {
161
  "epoch": 16.89,
162
+ "eval_f1": 0.888888888888889,
163
+ "eval_loss": 0.5045211911201477,
164
+ "eval_runtime": 0.514,
165
+ "eval_samples_per_second": 136.181,
166
+ "eval_steps_per_second": 5.836,
167
  "step": 34
168
  },
169
  {
170
  "epoch": 17.89,
171
+ "eval_f1": 0.888888888888889,
172
+ "eval_loss": 0.4998997747898102,
173
+ "eval_runtime": 0.5294,
174
+ "eval_samples_per_second": 132.214,
175
+ "eval_steps_per_second": 5.666,
176
  "step": 36
177
  },
178
  {
179
  "epoch": 18.89,
180
+ "eval_f1": 0.888888888888889,
181
+ "eval_loss": 0.4965072572231293,
182
+ "eval_runtime": 0.5087,
183
+ "eval_samples_per_second": 137.597,
184
+ "eval_steps_per_second": 5.897,
185
  "step": 38
186
  },
187
  {
188
  "epoch": 19.89,
189
+ "eval_f1": 0.888888888888889,
190
+ "eval_loss": 0.493766188621521,
191
+ "eval_runtime": 0.5102,
192
+ "eval_samples_per_second": 137.214,
193
+ "eval_steps_per_second": 5.881,
194
  "step": 40
195
  },
196
  {
197
  "epoch": 20.89,
198
+ "eval_f1": 0.888888888888889,
199
+ "eval_loss": 0.49165278673171997,
200
+ "eval_runtime": 0.5078,
201
+ "eval_samples_per_second": 137.845,
202
+ "eval_steps_per_second": 5.908,
203
  "step": 42
204
  },
205
  {
206
  "epoch": 21.89,
207
+ "eval_f1": 0.888888888888889,
208
+ "eval_loss": 0.4898264706134796,
209
+ "eval_runtime": 0.5144,
210
+ "eval_samples_per_second": 136.069,
211
+ "eval_steps_per_second": 5.832,
212
  "step": 44
213
  },
214
  {
215
  "epoch": 22.89,
216
+ "eval_f1": 0.888888888888889,
217
+ "eval_loss": 0.4883803129196167,
218
+ "eval_runtime": 0.5035,
219
+ "eval_samples_per_second": 139.037,
220
+ "eval_steps_per_second": 5.959,
221
  "step": 46
222
  },
223
  {
224
  "epoch": 23.89,
225
+ "eval_f1": 0.888888888888889,
226
+ "eval_loss": 0.4872118830680847,
227
+ "eval_runtime": 0.5072,
228
+ "eval_samples_per_second": 138.01,
229
+ "eval_steps_per_second": 5.915,
230
  "step": 48
231
  },
232
  {
233
  "epoch": 24.89,
234
+ "learning_rate": 9.25925925925926e-06,
235
+ "loss": 0.5044,
236
  "step": 50
237
  },
238
  {
239
  "epoch": 24.89,
240
+ "eval_f1": 0.888888888888889,
241
+ "eval_loss": 0.4861142933368683,
242
+ "eval_runtime": 0.5085,
243
+ "eval_samples_per_second": 137.671,
244
+ "eval_steps_per_second": 5.9,
245
  "step": 50
246
  },
247
  {
248
  "epoch": 25.89,
249
+ "eval_f1": 0.888888888888889,
250
+ "eval_loss": 0.4849891662597656,
251
+ "eval_runtime": 0.5374,
252
+ "eval_samples_per_second": 130.249,
253
+ "eval_steps_per_second": 5.582,
254
  "step": 52
255
  },
256
  {
257
  "epoch": 26.89,
258
+ "eval_f1": 0.888888888888889,
259
+ "eval_loss": 0.483755499124527,
260
+ "eval_runtime": 0.51,
261
+ "eval_samples_per_second": 137.255,
262
+ "eval_steps_per_second": 5.882,
263
  "step": 54
264
  },
265
  {
266
  "epoch": 27.89,
267
+ "eval_f1": 0.888888888888889,
268
+ "eval_loss": 0.4827323257923126,
269
+ "eval_runtime": 0.5139,
270
+ "eval_samples_per_second": 136.21,
271
+ "eval_steps_per_second": 5.838,
272
  "step": 56
273
  },
274
  {
275
  "epoch": 28.89,
276
+ "eval_f1": 0.888888888888889,
277
+ "eval_loss": 0.4820977449417114,
278
+ "eval_runtime": 0.5117,
279
+ "eval_samples_per_second": 136.794,
280
+ "eval_steps_per_second": 5.863,
281
  "step": 58
282
  },
283
  {
284
  "epoch": 29.89,
285
+ "eval_f1": 0.888888888888889,
286
+ "eval_loss": 0.4817977249622345,
287
+ "eval_runtime": 0.5176,
288
+ "eval_samples_per_second": 135.231,
289
+ "eval_steps_per_second": 5.796,
290
  "step": 60
291
  },
292
  {
293
  "epoch": 30.89,
294
+ "eval_f1": 0.888888888888889,
295
+ "eval_loss": 0.4813471734523773,
296
+ "eval_runtime": 0.5076,
297
+ "eval_samples_per_second": 137.901,
298
+ "eval_steps_per_second": 5.91,
299
  "step": 62
300
  },
301
  {
302
  "epoch": 31.89,
303
+ "eval_f1": 0.888888888888889,
304
+ "eval_loss": 0.4812641739845276,
305
+ "eval_runtime": 0.5176,
306
+ "eval_samples_per_second": 135.242,
307
+ "eval_steps_per_second": 5.796,
308
  "step": 64
309
  },
310
  {
311
  "epoch": 32.89,
312
+ "eval_f1": 0.888888888888889,
313
+ "eval_loss": 0.48070254921913147,
314
+ "eval_runtime": 0.5078,
315
+ "eval_samples_per_second": 137.854,
316
+ "eval_steps_per_second": 5.908,
317
  "step": 66
318
  },
319
  {
320
  "epoch": 33.89,
321
+ "eval_f1": 0.888888888888889,
322
+ "eval_loss": 0.48028674721717834,
323
+ "eval_runtime": 0.5091,
324
+ "eval_samples_per_second": 137.488,
325
+ "eval_steps_per_second": 5.892,
326
  "step": 68
327
  },
328
  {
329
  "epoch": 34.89,
330
+ "eval_f1": 0.888888888888889,
331
+ "eval_loss": 0.4801904261112213,
332
+ "eval_runtime": 0.5047,
333
+ "eval_samples_per_second": 138.694,
334
+ "eval_steps_per_second": 5.944,
335
  "step": 70
336
  },
337
  {
338
  "epoch": 35.89,
339
+ "eval_f1": 0.888888888888889,
340
+ "eval_loss": 0.4799834191799164,
341
+ "eval_runtime": 0.5083,
342
+ "eval_samples_per_second": 137.704,
343
+ "eval_steps_per_second": 5.902,
344
  "step": 72
345
  },
346
  {
347
  "epoch": 36.89,
348
+ "eval_f1": 0.888888888888889,
349
+ "eval_loss": 0.47955021262168884,
350
+ "eval_runtime": 0.5047,
351
+ "eval_samples_per_second": 138.688,
352
+ "eval_steps_per_second": 5.944,
353
  "step": 74
354
  },
355
  {
356
  "epoch": 37.44,
357
+ "learning_rate": 8.333333333333334e-06,
358
+ "loss": 0.4434,
359
  "step": 75
360
  },
361
  {
362
  "epoch": 37.89,
363
+ "eval_f1": 0.888888888888889,
364
+ "eval_loss": 0.47954249382019043,
365
+ "eval_runtime": 0.5098,
366
+ "eval_samples_per_second": 137.317,
367
+ "eval_steps_per_second": 5.885,
368
  "step": 76
369
  },
370
  {
371
  "epoch": 38.89,
372
+ "eval_f1": 0.888888888888889,
373
+ "eval_loss": 0.47896233201026917,
374
+ "eval_runtime": 0.5167,
375
+ "eval_samples_per_second": 135.481,
376
+ "eval_steps_per_second": 5.806,
377
  "step": 78
378
  },
379
  {
380
  "epoch": 39.89,
381
+ "eval_f1": 0.888888888888889,
382
+ "eval_loss": 0.4780120849609375,
383
+ "eval_runtime": 0.5174,
384
+ "eval_samples_per_second": 135.289,
385
+ "eval_steps_per_second": 5.798,
386
  "step": 80
387
  },
388
  {
389
  "epoch": 40.89,
390
+ "eval_f1": 0.888888888888889,
391
+ "eval_loss": 0.4772844612598419,
392
+ "eval_runtime": 0.5162,
393
+ "eval_samples_per_second": 135.603,
394
+ "eval_steps_per_second": 5.812,
395
  "step": 82
396
  },
397
  {
398
  "epoch": 41.89,
399
+ "eval_f1": 0.888888888888889,
400
+ "eval_loss": 0.47639530897140503,
401
+ "eval_runtime": 0.5186,
402
+ "eval_samples_per_second": 134.98,
403
+ "eval_steps_per_second": 5.785,
404
  "step": 84
405
  },
406
  {
407
  "epoch": 42.89,
408
+ "eval_f1": 0.888888888888889,
409
+ "eval_loss": 0.47554805874824524,
410
+ "eval_runtime": 0.5172,
411
+ "eval_samples_per_second": 135.355,
412
+ "eval_steps_per_second": 5.801,
413
  "step": 86
414
  },
415
  {
416
  "epoch": 43.89,
417
+ "eval_f1": 0.888888888888889,
418
+ "eval_loss": 0.4745935797691345,
419
+ "eval_runtime": 0.514,
420
+ "eval_samples_per_second": 136.19,
421
+ "eval_steps_per_second": 5.837,
422
  "step": 88
423
  },
424
  {
425
  "epoch": 44.89,
426
+ "eval_f1": 0.888888888888889,
427
+ "eval_loss": 0.4736994504928589,
428
+ "eval_runtime": 0.5108,
429
+ "eval_samples_per_second": 137.03,
430
+ "eval_steps_per_second": 5.873,
431
  "step": 90
432
  },
433
  {
434
  "epoch": 45.89,
435
+ "eval_f1": 0.888888888888889,
436
+ "eval_loss": 0.4725368618965149,
437
+ "eval_runtime": 0.5067,
438
+ "eval_samples_per_second": 138.137,
439
+ "eval_steps_per_second": 5.92,
440
  "step": 92
441
  },
442
  {
443
  "epoch": 46.89,
444
+ "eval_f1": 0.888888888888889,
445
+ "eval_loss": 0.47122785449028015,
446
+ "eval_runtime": 0.5187,
447
+ "eval_samples_per_second": 134.95,
448
+ "eval_steps_per_second": 5.784,
449
  "step": 94
450
  },
451
  {
452
  "epoch": 47.89,
453
+ "eval_f1": 0.888888888888889,
454
+ "eval_loss": 0.4697120189666748,
455
+ "eval_runtime": 0.5106,
456
+ "eval_samples_per_second": 137.089,
457
+ "eval_steps_per_second": 5.875,
458
  "step": 96
459
  },
460
  {
461
  "epoch": 48.89,
462
+ "eval_f1": 0.888888888888889,
463
+ "eval_loss": 0.4686456322669983,
464
+ "eval_runtime": 0.5071,
465
+ "eval_samples_per_second": 138.045,
466
+ "eval_steps_per_second": 5.916,
467
  "step": 98
468
  },
469
  {
470
  "epoch": 49.89,
471
+ "learning_rate": 7.4074074074074075e-06,
472
+ "loss": 0.4,
473
  "step": 100
474
  },
475
  {
476
  "epoch": 49.89,
477
+ "eval_f1": 0.888888888888889,
478
+ "eval_loss": 0.46755868196487427,
479
+ "eval_runtime": 0.5094,
480
+ "eval_samples_per_second": 137.403,
481
+ "eval_steps_per_second": 5.889,
482
  "step": 100
483
  },
484
  {
485
  "epoch": 50.89,
486
+ "eval_f1": 0.888888888888889,
487
+ "eval_loss": 0.4666549265384674,
488
+ "eval_runtime": 0.5092,
489
+ "eval_samples_per_second": 137.458,
490
+ "eval_steps_per_second": 5.891,
491
  "step": 102
492
  },
493
  {
494
  "epoch": 51.89,
495
+ "eval_f1": 0.888888888888889,
496
+ "eval_loss": 0.46638351678848267,
497
+ "eval_runtime": 0.5124,
498
+ "eval_samples_per_second": 136.625,
499
+ "eval_steps_per_second": 5.855,
500
  "step": 104
501
  },
502
  {
503
  "epoch": 52.89,
504
+ "eval_f1": 0.888888888888889,
505
+ "eval_loss": 0.4666298031806946,
506
+ "eval_runtime": 0.5135,
507
+ "eval_samples_per_second": 136.326,
508
+ "eval_steps_per_second": 5.843,
509
  "step": 106
510
  },
511
  {
512
  "epoch": 53.89,
513
+ "eval_f1": 0.888888888888889,
514
+ "eval_loss": 0.46660488843917847,
515
+ "eval_runtime": 0.5175,
516
+ "eval_samples_per_second": 135.261,
517
+ "eval_steps_per_second": 5.797,
518
  "step": 108
519
  },
520
  {
521
  "epoch": 54.89,
522
+ "eval_f1": 0.888888888888889,
523
+ "eval_loss": 0.4668794870376587,
524
+ "eval_runtime": 0.5009,
525
+ "eval_samples_per_second": 139.747,
526
+ "eval_steps_per_second": 5.989,
527
  "step": 110
528
  },
529
  {
530
  "epoch": 55.89,
531
+ "eval_f1": 0.888888888888889,
532
+ "eval_loss": 0.46678298711776733,
533
+ "eval_runtime": 0.5078,
534
+ "eval_samples_per_second": 137.844,
535
+ "eval_steps_per_second": 5.908,
536
  "step": 112
537
  },
538
  {
539
  "epoch": 56.89,
540
+ "eval_f1": 0.888888888888889,
541
+ "eval_loss": 0.46638283133506775,
542
+ "eval_runtime": 0.5157,
543
+ "eval_samples_per_second": 135.74,
544
+ "eval_steps_per_second": 5.817,
545
  "step": 114
546
  },
547
  {
548
  "epoch": 57.89,
549
+ "eval_f1": 0.888888888888889,
550
+ "eval_loss": 0.46611693501472473,
551
+ "eval_runtime": 0.5257,
552
+ "eval_samples_per_second": 133.154,
553
+ "eval_steps_per_second": 5.707,
554
  "step": 116
555
  },
556
  {
557
  "epoch": 58.89,
558
+ "eval_f1": 0.888888888888889,
559
+ "eval_loss": 0.4656626582145691,
560
+ "eval_runtime": 0.5108,
561
+ "eval_samples_per_second": 137.041,
562
+ "eval_steps_per_second": 5.873,
563
  "step": 118
564
  },
565
  {
566
  "epoch": 59.89,
567
+ "eval_f1": 0.888888888888889,
568
+ "eval_loss": 0.4653801918029785,
569
+ "eval_runtime": 0.5184,
570
+ "eval_samples_per_second": 135.019,
571
+ "eval_steps_per_second": 5.787,
572
  "step": 120
573
  },
574
  {
575
  "epoch": 60.89,
576
+ "eval_f1": 0.888888888888889,
577
+ "eval_loss": 0.46494531631469727,
578
+ "eval_runtime": 0.5265,
579
+ "eval_samples_per_second": 132.948,
580
+ "eval_steps_per_second": 5.698,
581
  "step": 122
582
  },
583
  {
584
  "epoch": 61.89,
585
+ "eval_f1": 0.888888888888889,
586
+ "eval_loss": 0.4643649458885193,
587
+ "eval_runtime": 0.5176,
588
+ "eval_samples_per_second": 135.236,
589
+ "eval_steps_per_second": 5.796,
590
  "step": 124
591
  },
592
  {
593
  "epoch": 62.44,
594
+ "learning_rate": 6.481481481481482e-06,
595
+ "loss": 0.3712,
596
  "step": 125
597
  },
598
  {
599
  "epoch": 62.89,
600
+ "eval_f1": 0.888888888888889,
601
+ "eval_loss": 0.46361738443374634,
602
+ "eval_runtime": 0.5177,
603
+ "eval_samples_per_second": 135.21,
604
+ "eval_steps_per_second": 5.795,
605
  "step": 126
606
  },
607
  {
608
  "epoch": 63.89,
609
+ "eval_f1": 0.888888888888889,
610
+ "eval_loss": 0.46249672770500183,
611
+ "eval_runtime": 0.5106,
612
+ "eval_samples_per_second": 137.097,
613
+ "eval_steps_per_second": 5.876,
614
  "step": 128
615
  },
616
  {
617
  "epoch": 64.89,
618
+ "eval_f1": 0.888888888888889,
619
+ "eval_loss": 0.4611242711544037,
620
+ "eval_runtime": 0.5134,
621
+ "eval_samples_per_second": 136.352,
622
+ "eval_steps_per_second": 5.844,
623
  "step": 130
624
  },
625
  {
626
  "epoch": 65.89,
627
+ "eval_f1": 0.888888888888889,
628
+ "eval_loss": 0.4598376154899597,
629
+ "eval_runtime": 0.5045,
630
+ "eval_samples_per_second": 138.759,
631
+ "eval_steps_per_second": 5.947,
632
  "step": 132
633
  },
634
  {
635
  "epoch": 66.89,
636
+ "eval_f1": 0.888888888888889,
637
+ "eval_loss": 0.45868563652038574,
638
+ "eval_runtime": 0.5061,
639
+ "eval_samples_per_second": 138.303,
640
+ "eval_steps_per_second": 5.927,
641
  "step": 134
642
  },
643
  {
644
  "epoch": 67.89,
645
+ "eval_f1": 0.888888888888889,
646
+ "eval_loss": 0.4579010605812073,
647
+ "eval_runtime": 0.5154,
648
+ "eval_samples_per_second": 135.819,
649
+ "eval_steps_per_second": 5.821,
650
  "step": 136
651
  },
652
  {
653
  "epoch": 68.89,
654
+ "eval_f1": 0.888888888888889,
655
+ "eval_loss": 0.4574340879917145,
656
+ "eval_runtime": 0.5107,
657
+ "eval_samples_per_second": 137.062,
658
+ "eval_steps_per_second": 5.874,
659
  "step": 138
660
  },
661
  {
662
  "epoch": 69.89,
663
+ "eval_f1": 0.888888888888889,
664
+ "eval_loss": 0.45691609382629395,
665
+ "eval_runtime": 0.5186,
666
+ "eval_samples_per_second": 134.99,
667
+ "eval_steps_per_second": 5.785,
668
  "step": 140
669
  },
670
  {
671
  "epoch": 70.89,
672
+ "eval_f1": 0.888888888888889,
673
+ "eval_loss": 0.45680859684944153,
674
+ "eval_runtime": 0.5147,
675
+ "eval_samples_per_second": 136.006,
676
+ "eval_steps_per_second": 5.829,
677
  "step": 142
678
  },
679
  {
680
  "epoch": 71.89,
681
+ "eval_f1": 0.888888888888889,
682
+ "eval_loss": 0.4564048945903778,
683
+ "eval_runtime": 0.5157,
684
+ "eval_samples_per_second": 135.741,
685
+ "eval_steps_per_second": 5.817,
686
  "step": 144
687
  },
688
  {
689
  "epoch": 72.89,
690
+ "eval_f1": 0.888888888888889,
691
+ "eval_loss": 0.4561113119125366,
692
+ "eval_runtime": 0.5039,
693
+ "eval_samples_per_second": 138.917,
694
+ "eval_steps_per_second": 5.954,
695
  "step": 146
696
  },
697
  {
698
  "epoch": 73.89,
699
+ "eval_f1": 0.888888888888889,
700
+ "eval_loss": 0.45622721314430237,
701
+ "eval_runtime": 0.514,
702
+ "eval_samples_per_second": 136.175,
703
+ "eval_steps_per_second": 5.836,
704
  "step": 148
705
  },
706
  {
707
  "epoch": 74.89,
708
+ "learning_rate": 5.555555555555557e-06,
709
+ "loss": 0.3419,
710
  "step": 150
711
  },
712
  {
713
  "epoch": 74.89,
714
+ "eval_f1": 0.888888888888889,
715
+ "eval_loss": 0.4559585452079773,
716
+ "eval_runtime": 0.5107,
717
+ "eval_samples_per_second": 137.074,
718
+ "eval_steps_per_second": 5.875,
719
  "step": 150
720
  },
721
  {
722
+ "epoch": 75.89,
723
+ "eval_f1": 0.888888888888889,
724
+ "eval_loss": 0.4556769132614136,
725
+ "eval_runtime": 0.5134,
726
+ "eval_samples_per_second": 136.339,
727
+ "eval_steps_per_second": 5.843,
728
+ "step": 152
729
+ },
730
+ {
731
+ "epoch": 76.89,
732
+ "eval_f1": 0.888888888888889,
733
+ "eval_loss": 0.45586857199668884,
734
+ "eval_runtime": 0.5119,
735
+ "eval_samples_per_second": 136.757,
736
+ "eval_steps_per_second": 5.861,
737
+ "step": 154
738
+ },
739
+ {
740
+ "epoch": 77.89,
741
+ "eval_f1": 0.888888888888889,
742
+ "eval_loss": 0.45571208000183105,
743
+ "eval_runtime": 0.5116,
744
+ "eval_samples_per_second": 136.818,
745
+ "eval_steps_per_second": 5.864,
746
+ "step": 156
747
+ },
748
+ {
749
+ "epoch": 78.89,
750
+ "eval_f1": 0.888888888888889,
751
+ "eval_loss": 0.4558698832988739,
752
+ "eval_runtime": 0.5082,
753
+ "eval_samples_per_second": 137.744,
754
+ "eval_steps_per_second": 5.903,
755
+ "step": 158
756
+ },
757
+ {
758
+ "epoch": 79.89,
759
+ "eval_f1": 0.888888888888889,
760
+ "eval_loss": 0.4559585154056549,
761
+ "eval_runtime": 0.5127,
762
+ "eval_samples_per_second": 136.542,
763
+ "eval_steps_per_second": 5.852,
764
+ "step": 160
765
+ },
766
+ {
767
+ "epoch": 80.89,
768
+ "eval_f1": 0.888888888888889,
769
+ "eval_loss": 0.4561022222042084,
770
+ "eval_runtime": 0.5136,
771
+ "eval_samples_per_second": 136.283,
772
+ "eval_steps_per_second": 5.841,
773
+ "step": 162
774
+ },
775
+ {
776
+ "epoch": 81.89,
777
+ "eval_f1": 0.888888888888889,
778
+ "eval_loss": 0.4561418294906616,
779
+ "eval_runtime": 0.5185,
780
+ "eval_samples_per_second": 135.002,
781
+ "eval_steps_per_second": 5.786,
782
+ "step": 164
783
+ },
784
+ {
785
+ "epoch": 82.89,
786
+ "eval_f1": 0.888888888888889,
787
+ "eval_loss": 0.45633062720298767,
788
+ "eval_runtime": 0.5583,
789
+ "eval_samples_per_second": 125.383,
790
+ "eval_steps_per_second": 5.374,
791
+ "step": 166
792
+ },
793
+ {
794
+ "epoch": 83.89,
795
+ "eval_f1": 0.896,
796
+ "eval_loss": 0.45609140396118164,
797
+ "eval_runtime": 0.5096,
798
+ "eval_samples_per_second": 137.376,
799
+ "eval_steps_per_second": 5.888,
800
+ "step": 168
801
+ },
802
+ {
803
+ "epoch": 84.89,
804
+ "eval_f1": 0.896,
805
+ "eval_loss": 0.4557640850543976,
806
+ "eval_runtime": 0.5089,
807
+ "eval_samples_per_second": 137.544,
808
+ "eval_steps_per_second": 5.895,
809
+ "step": 170
810
+ },
811
+ {
812
+ "epoch": 85.89,
813
+ "eval_f1": 0.896,
814
+ "eval_loss": 0.4552680552005768,
815
+ "eval_runtime": 0.5124,
816
+ "eval_samples_per_second": 136.601,
817
+ "eval_steps_per_second": 5.854,
818
+ "step": 172
819
+ },
820
+ {
821
+ "epoch": 86.89,
822
+ "eval_f1": 0.896,
823
+ "eval_loss": 0.45485004782676697,
824
+ "eval_runtime": 0.5155,
825
+ "eval_samples_per_second": 135.799,
826
+ "eval_steps_per_second": 5.82,
827
+ "step": 174
828
+ },
829
+ {
830
+ "epoch": 87.44,
831
+ "learning_rate": 4.62962962962963e-06,
832
+ "loss": 0.3221,
833
+ "step": 175
834
+ },
835
+ {
836
+ "epoch": 87.89,
837
+ "eval_f1": 0.896,
838
+ "eval_loss": 0.45451802015304565,
839
+ "eval_runtime": 0.5105,
840
+ "eval_samples_per_second": 137.109,
841
+ "eval_steps_per_second": 5.876,
842
+ "step": 176
843
+ },
844
+ {
845
+ "epoch": 88.89,
846
+ "eval_f1": 0.896,
847
+ "eval_loss": 0.4541896879673004,
848
+ "eval_runtime": 0.5149,
849
+ "eval_samples_per_second": 135.937,
850
+ "eval_steps_per_second": 5.826,
851
+ "step": 178
852
+ },
853
+ {
854
+ "epoch": 89.89,
855
+ "eval_f1": 0.896,
856
+ "eval_loss": 0.45373955368995667,
857
+ "eval_runtime": 0.5183,
858
+ "eval_samples_per_second": 135.051,
859
+ "eval_steps_per_second": 5.788,
860
+ "step": 180
861
+ },
862
+ {
863
+ "epoch": 90.89,
864
+ "eval_f1": 0.896,
865
+ "eval_loss": 0.4535920023918152,
866
+ "eval_runtime": 0.5109,
867
+ "eval_samples_per_second": 137.024,
868
+ "eval_steps_per_second": 5.872,
869
+ "step": 182
870
+ },
871
+ {
872
+ "epoch": 91.89,
873
+ "eval_f1": 0.896,
874
+ "eval_loss": 0.45348674058914185,
875
+ "eval_runtime": 0.5161,
876
+ "eval_samples_per_second": 135.628,
877
+ "eval_steps_per_second": 5.813,
878
+ "step": 184
879
+ },
880
+ {
881
+ "epoch": 92.89,
882
+ "eval_f1": 0.896,
883
+ "eval_loss": 0.45325374603271484,
884
+ "eval_runtime": 0.5128,
885
+ "eval_samples_per_second": 136.516,
886
+ "eval_steps_per_second": 5.851,
887
+ "step": 186
888
+ },
889
+ {
890
+ "epoch": 93.89,
891
+ "eval_f1": 0.896,
892
+ "eval_loss": 0.4530419111251831,
893
+ "eval_runtime": 0.5198,
894
+ "eval_samples_per_second": 134.677,
895
+ "eval_steps_per_second": 5.772,
896
+ "step": 188
897
+ },
898
+ {
899
+ "epoch": 94.89,
900
+ "eval_f1": 0.896,
901
+ "eval_loss": 0.452932745218277,
902
+ "eval_runtime": 0.5123,
903
+ "eval_samples_per_second": 136.651,
904
+ "eval_steps_per_second": 5.856,
905
+ "step": 190
906
+ },
907
+ {
908
+ "epoch": 95.89,
909
+ "eval_f1": 0.896,
910
+ "eval_loss": 0.45291921496391296,
911
+ "eval_runtime": 0.5108,
912
+ "eval_samples_per_second": 137.03,
913
+ "eval_steps_per_second": 5.873,
914
+ "step": 192
915
+ },
916
+ {
917
+ "epoch": 96.89,
918
+ "eval_f1": 0.896,
919
+ "eval_loss": 0.45280784368515015,
920
+ "eval_runtime": 0.5125,
921
+ "eval_samples_per_second": 136.577,
922
+ "eval_steps_per_second": 5.853,
923
+ "step": 194
924
+ },
925
+ {
926
+ "epoch": 97.89,
927
+ "eval_f1": 0.896,
928
+ "eval_loss": 0.4528333246707916,
929
+ "eval_runtime": 0.5077,
930
+ "eval_samples_per_second": 137.871,
931
+ "eval_steps_per_second": 5.909,
932
+ "step": 196
933
+ },
934
+ {
935
+ "epoch": 98.89,
936
+ "eval_f1": 0.896,
937
+ "eval_loss": 0.45277661085128784,
938
+ "eval_runtime": 0.508,
939
+ "eval_samples_per_second": 137.803,
940
+ "eval_steps_per_second": 5.906,
941
+ "step": 198
942
+ },
943
+ {
944
+ "epoch": 99.89,
945
+ "learning_rate": 3.7037037037037037e-06,
946
+ "loss": 0.297,
947
+ "step": 200
948
+ },
949
+ {
950
+ "epoch": 99.89,
951
+ "eval_f1": 0.896,
952
+ "eval_loss": 0.45276370644569397,
953
+ "eval_runtime": 0.5103,
954
+ "eval_samples_per_second": 137.186,
955
+ "eval_steps_per_second": 5.879,
956
+ "step": 200
957
+ },
958
+ {
959
+ "epoch": 100.89,
960
+ "eval_f1": 0.896,
961
+ "eval_loss": 0.4528166651725769,
962
+ "eval_runtime": 0.5087,
963
+ "eval_samples_per_second": 137.613,
964
+ "eval_steps_per_second": 5.898,
965
+ "step": 202
966
+ },
967
+ {
968
+ "epoch": 101.89,
969
+ "eval_f1": 0.896,
970
+ "eval_loss": 0.45277735590934753,
971
+ "eval_runtime": 0.5107,
972
+ "eval_samples_per_second": 137.056,
973
+ "eval_steps_per_second": 5.874,
974
+ "step": 204
975
+ },
976
+ {
977
+ "epoch": 102.89,
978
+ "eval_f1": 0.896,
979
+ "eval_loss": 0.4523409307003021,
980
+ "eval_runtime": 0.519,
981
+ "eval_samples_per_second": 134.862,
982
+ "eval_steps_per_second": 5.78,
983
+ "step": 206
984
+ },
985
+ {
986
+ "epoch": 103.89,
987
+ "eval_f1": 0.896,
988
+ "eval_loss": 0.4522373080253601,
989
+ "eval_runtime": 0.5234,
990
+ "eval_samples_per_second": 133.744,
991
+ "eval_steps_per_second": 5.732,
992
+ "step": 208
993
+ },
994
+ {
995
+ "epoch": 104.89,
996
+ "eval_f1": 0.896,
997
+ "eval_loss": 0.4519382417201996,
998
+ "eval_runtime": 0.5099,
999
+ "eval_samples_per_second": 137.277,
1000
+ "eval_steps_per_second": 5.883,
1001
+ "step": 210
1002
+ },
1003
+ {
1004
+ "epoch": 105.89,
1005
+ "eval_f1": 0.896,
1006
+ "eval_loss": 0.4515880048274994,
1007
+ "eval_runtime": 0.5111,
1008
+ "eval_samples_per_second": 136.947,
1009
+ "eval_steps_per_second": 5.869,
1010
+ "step": 212
1011
+ },
1012
+ {
1013
+ "epoch": 106.89,
1014
+ "eval_f1": 0.896,
1015
+ "eval_loss": 0.4515409767627716,
1016
+ "eval_runtime": 0.5212,
1017
+ "eval_samples_per_second": 134.295,
1018
+ "eval_steps_per_second": 5.755,
1019
+ "step": 214
1020
+ },
1021
+ {
1022
+ "epoch": 107.89,
1023
+ "eval_f1": 0.896,
1024
+ "eval_loss": 0.45120465755462646,
1025
+ "eval_runtime": 0.5234,
1026
+ "eval_samples_per_second": 133.737,
1027
+ "eval_steps_per_second": 5.732,
1028
+ "step": 216
1029
+ },
1030
+ {
1031
+ "epoch": 108.89,
1032
+ "eval_f1": 0.9032258064516129,
1033
+ "eval_loss": 0.45062127709388733,
1034
+ "eval_runtime": 0.5116,
1035
+ "eval_samples_per_second": 136.817,
1036
+ "eval_steps_per_second": 5.864,
1037
+ "step": 218
1038
+ },
1039
+ {
1040
+ "epoch": 109.89,
1041
+ "eval_f1": 0.9032258064516129,
1042
+ "eval_loss": 0.450234055519104,
1043
+ "eval_runtime": 0.5074,
1044
+ "eval_samples_per_second": 137.954,
1045
+ "eval_steps_per_second": 5.912,
1046
+ "step": 220
1047
+ },
1048
+ {
1049
+ "epoch": 110.89,
1050
+ "eval_f1": 0.9032258064516129,
1051
+ "eval_loss": 0.45027095079421997,
1052
+ "eval_runtime": 0.5078,
1053
+ "eval_samples_per_second": 137.84,
1054
+ "eval_steps_per_second": 5.907,
1055
+ "step": 222
1056
+ },
1057
+ {
1058
+ "epoch": 111.89,
1059
+ "eval_f1": 0.9032258064516129,
1060
+ "eval_loss": 0.4502160847187042,
1061
+ "eval_runtime": 0.5143,
1062
+ "eval_samples_per_second": 136.105,
1063
+ "eval_steps_per_second": 5.833,
1064
+ "step": 224
1065
+ },
1066
+ {
1067
+ "epoch": 112.44,
1068
+ "learning_rate": 2.7777777777777783e-06,
1069
+ "loss": 0.2809,
1070
+ "step": 225
1071
+ },
1072
+ {
1073
+ "epoch": 112.89,
1074
+ "eval_f1": 0.9032258064516129,
1075
+ "eval_loss": 0.45027267932891846,
1076
+ "eval_runtime": 0.5062,
1077
+ "eval_samples_per_second": 138.288,
1078
+ "eval_steps_per_second": 5.927,
1079
+ "step": 226
1080
+ },
1081
+ {
1082
+ "epoch": 113.89,
1083
+ "eval_f1": 0.9032258064516129,
1084
+ "eval_loss": 0.45001256465911865,
1085
+ "eval_runtime": 0.5176,
1086
+ "eval_samples_per_second": 135.243,
1087
+ "eval_steps_per_second": 5.796,
1088
+ "step": 228
1089
+ },
1090
+ {
1091
+ "epoch": 114.89,
1092
+ "eval_f1": 0.9032258064516129,
1093
+ "eval_loss": 0.44984087347984314,
1094
+ "eval_runtime": 0.5057,
1095
+ "eval_samples_per_second": 138.435,
1096
+ "eval_steps_per_second": 5.933,
1097
+ "step": 230
1098
+ },
1099
+ {
1100
+ "epoch": 115.89,
1101
+ "eval_f1": 0.9032258064516129,
1102
+ "eval_loss": 0.4496540129184723,
1103
+ "eval_runtime": 0.5087,
1104
+ "eval_samples_per_second": 137.605,
1105
+ "eval_steps_per_second": 5.897,
1106
+ "step": 232
1107
+ },
1108
+ {
1109
+ "epoch": 116.89,
1110
+ "eval_f1": 0.9032258064516129,
1111
+ "eval_loss": 0.4495893120765686,
1112
+ "eval_runtime": 0.5162,
1113
+ "eval_samples_per_second": 135.612,
1114
+ "eval_steps_per_second": 5.812,
1115
+ "step": 234
1116
+ },
1117
+ {
1118
+ "epoch": 117.89,
1119
+ "eval_f1": 0.9032258064516129,
1120
+ "eval_loss": 0.4495932459831238,
1121
+ "eval_runtime": 0.5162,
1122
+ "eval_samples_per_second": 135.596,
1123
+ "eval_steps_per_second": 5.811,
1124
+ "step": 236
1125
+ },
1126
+ {
1127
+ "epoch": 118.89,
1128
+ "eval_f1": 0.9032258064516129,
1129
+ "eval_loss": 0.4495743215084076,
1130
+ "eval_runtime": 0.5143,
1131
+ "eval_samples_per_second": 136.098,
1132
+ "eval_steps_per_second": 5.833,
1133
+ "step": 238
1134
+ },
1135
+ {
1136
+ "epoch": 119.89,
1137
+ "eval_f1": 0.9032258064516129,
1138
+ "eval_loss": 0.4495793282985687,
1139
+ "eval_runtime": 0.5201,
1140
+ "eval_samples_per_second": 134.594,
1141
+ "eval_steps_per_second": 5.768,
1142
+ "step": 240
1143
+ },
1144
+ {
1145
+ "epoch": 120.89,
1146
+ "eval_f1": 0.9032258064516129,
1147
+ "eval_loss": 0.44972047209739685,
1148
+ "eval_runtime": 0.517,
1149
+ "eval_samples_per_second": 135.393,
1150
+ "eval_steps_per_second": 5.803,
1151
+ "step": 242
1152
+ },
1153
+ {
1154
+ "epoch": 121.89,
1155
+ "eval_f1": 0.9032258064516129,
1156
+ "eval_loss": 0.4496006965637207,
1157
+ "eval_runtime": 0.5201,
1158
+ "eval_samples_per_second": 134.597,
1159
+ "eval_steps_per_second": 5.768,
1160
+ "step": 244
1161
+ },
1162
+ {
1163
+ "epoch": 122.89,
1164
+ "eval_f1": 0.9032258064516129,
1165
+ "eval_loss": 0.4495578706264496,
1166
+ "eval_runtime": 0.5087,
1167
+ "eval_samples_per_second": 137.594,
1168
+ "eval_steps_per_second": 5.897,
1169
+ "step": 246
1170
+ },
1171
+ {
1172
+ "epoch": 123.89,
1173
+ "eval_f1": 0.9032258064516129,
1174
+ "eval_loss": 0.44961830973625183,
1175
+ "eval_runtime": 0.5091,
1176
+ "eval_samples_per_second": 137.492,
1177
+ "eval_steps_per_second": 5.893,
1178
+ "step": 248
1179
+ },
1180
+ {
1181
+ "epoch": 124.89,
1182
+ "learning_rate": 1.8518518518518519e-06,
1183
+ "loss": 0.2717,
1184
+ "step": 250
1185
+ },
1186
+ {
1187
+ "epoch": 124.89,
1188
+ "eval_f1": 0.9032258064516129,
1189
+ "eval_loss": 0.4495493769645691,
1190
+ "eval_runtime": 0.5126,
1191
+ "eval_samples_per_second": 136.564,
1192
+ "eval_steps_per_second": 5.853,
1193
+ "step": 250
1194
+ },
1195
+ {
1196
+ "epoch": 125.89,
1197
+ "eval_f1": 0.9032258064516129,
1198
+ "eval_loss": 0.44953247904777527,
1199
+ "eval_runtime": 0.5166,
1200
+ "eval_samples_per_second": 135.508,
1201
+ "eval_steps_per_second": 5.807,
1202
+ "step": 252
1203
+ },
1204
+ {
1205
+ "epoch": 126.89,
1206
+ "eval_f1": 0.9032258064516129,
1207
+ "eval_loss": 0.44945859909057617,
1208
+ "eval_runtime": 0.5087,
1209
+ "eval_samples_per_second": 137.598,
1210
+ "eval_steps_per_second": 5.897,
1211
+ "step": 254
1212
+ },
1213
+ {
1214
+ "epoch": 127.89,
1215
+ "eval_f1": 0.9032258064516129,
1216
+ "eval_loss": 0.4493277072906494,
1217
+ "eval_runtime": 0.5129,
1218
+ "eval_samples_per_second": 136.49,
1219
+ "eval_steps_per_second": 5.85,
1220
+ "step": 256
1221
+ },
1222
+ {
1223
+ "epoch": 128.89,
1224
+ "eval_f1": 0.9032258064516129,
1225
+ "eval_loss": 0.4492017924785614,
1226
+ "eval_runtime": 0.5132,
1227
+ "eval_samples_per_second": 136.411,
1228
+ "eval_steps_per_second": 5.846,
1229
+ "step": 258
1230
+ },
1231
+ {
1232
+ "epoch": 129.89,
1233
+ "eval_f1": 0.9032258064516129,
1234
+ "eval_loss": 0.44910600781440735,
1235
+ "eval_runtime": 0.5158,
1236
+ "eval_samples_per_second": 135.724,
1237
+ "eval_steps_per_second": 5.817,
1238
+ "step": 260
1239
+ },
1240
+ {
1241
+ "epoch": 130.89,
1242
+ "eval_f1": 0.9032258064516129,
1243
+ "eval_loss": 0.44898271560668945,
1244
+ "eval_runtime": 0.5171,
1245
+ "eval_samples_per_second": 135.359,
1246
+ "eval_steps_per_second": 5.801,
1247
+ "step": 262
1248
+ },
1249
+ {
1250
+ "epoch": 131.89,
1251
+ "eval_f1": 0.9032258064516129,
1252
+ "eval_loss": 0.44882574677467346,
1253
+ "eval_runtime": 0.5104,
1254
+ "eval_samples_per_second": 137.159,
1255
+ "eval_steps_per_second": 5.878,
1256
+ "step": 264
1257
+ },
1258
+ {
1259
+ "epoch": 132.89,
1260
+ "eval_f1": 0.9032258064516129,
1261
+ "eval_loss": 0.44872909784317017,
1262
+ "eval_runtime": 0.5186,
1263
+ "eval_samples_per_second": 134.967,
1264
+ "eval_steps_per_second": 5.784,
1265
+ "step": 266
1266
+ },
1267
+ {
1268
+ "epoch": 133.89,
1269
+ "eval_f1": 0.9032258064516129,
1270
+ "eval_loss": 0.4485660791397095,
1271
+ "eval_runtime": 0.511,
1272
+ "eval_samples_per_second": 136.99,
1273
+ "eval_steps_per_second": 5.871,
1274
+ "step": 268
1275
+ },
1276
+ {
1277
+ "epoch": 134.89,
1278
+ "eval_f1": 0.9032258064516129,
1279
+ "eval_loss": 0.44834819436073303,
1280
+ "eval_runtime": 0.5105,
1281
+ "eval_samples_per_second": 137.112,
1282
+ "eval_steps_per_second": 5.876,
1283
+ "step": 270
1284
+ },
1285
+ {
1286
+ "epoch": 135.89,
1287
+ "eval_f1": 0.9032258064516129,
1288
+ "eval_loss": 0.448197603225708,
1289
+ "eval_runtime": 0.5111,
1290
+ "eval_samples_per_second": 136.954,
1291
+ "eval_steps_per_second": 5.869,
1292
+ "step": 272
1293
+ },
1294
+ {
1295
+ "epoch": 136.89,
1296
+ "eval_f1": 0.9032258064516129,
1297
+ "eval_loss": 0.4480949342250824,
1298
+ "eval_runtime": 0.5102,
1299
+ "eval_samples_per_second": 137.192,
1300
+ "eval_steps_per_second": 5.88,
1301
+ "step": 274
1302
+ },
1303
+ {
1304
+ "epoch": 137.44,
1305
+ "learning_rate": 9.259259259259259e-07,
1306
+ "loss": 0.2597,
1307
+ "step": 275
1308
+ },
1309
+ {
1310
+ "epoch": 137.89,
1311
+ "eval_f1": 0.9032258064516129,
1312
+ "eval_loss": 0.447935551404953,
1313
+ "eval_runtime": 0.5123,
1314
+ "eval_samples_per_second": 136.645,
1315
+ "eval_steps_per_second": 5.856,
1316
+ "step": 276
1317
+ },
1318
+ {
1319
+ "epoch": 138.89,
1320
+ "eval_f1": 0.9032258064516129,
1321
+ "eval_loss": 0.44773900508880615,
1322
+ "eval_runtime": 0.5066,
1323
+ "eval_samples_per_second": 138.167,
1324
+ "eval_steps_per_second": 5.921,
1325
+ "step": 278
1326
+ },
1327
+ {
1328
+ "epoch": 139.89,
1329
+ "eval_f1": 0.9032258064516129,
1330
+ "eval_loss": 0.44755300879478455,
1331
+ "eval_runtime": 0.5128,
1332
+ "eval_samples_per_second": 136.51,
1333
+ "eval_steps_per_second": 5.85,
1334
+ "step": 280
1335
+ },
1336
+ {
1337
+ "epoch": 140.89,
1338
+ "eval_f1": 0.9032258064516129,
1339
+ "eval_loss": 0.44738340377807617,
1340
+ "eval_runtime": 0.5088,
1341
+ "eval_samples_per_second": 137.582,
1342
+ "eval_steps_per_second": 5.896,
1343
+ "step": 282
1344
+ },
1345
+ {
1346
+ "epoch": 141.89,
1347
+ "eval_f1": 0.9032258064516129,
1348
+ "eval_loss": 0.4472770690917969,
1349
+ "eval_runtime": 0.5106,
1350
+ "eval_samples_per_second": 137.086,
1351
+ "eval_steps_per_second": 5.875,
1352
+ "step": 284
1353
+ },
1354
+ {
1355
+ "epoch": 142.89,
1356
+ "eval_f1": 0.9032258064516129,
1357
+ "eval_loss": 0.44719263911247253,
1358
+ "eval_runtime": 0.5111,
1359
+ "eval_samples_per_second": 136.971,
1360
+ "eval_steps_per_second": 5.87,
1361
+ "step": 286
1362
+ },
1363
+ {
1364
+ "epoch": 143.89,
1365
+ "eval_f1": 0.9032258064516129,
1366
+ "eval_loss": 0.44712573289871216,
1367
+ "eval_runtime": 0.519,
1368
+ "eval_samples_per_second": 134.866,
1369
+ "eval_steps_per_second": 5.78,
1370
+ "step": 288
1371
+ },
1372
+ {
1373
+ "epoch": 144.89,
1374
+ "eval_f1": 0.9032258064516129,
1375
+ "eval_loss": 0.44704240560531616,
1376
+ "eval_runtime": 0.5125,
1377
+ "eval_samples_per_second": 136.589,
1378
+ "eval_steps_per_second": 5.854,
1379
+ "step": 290
1380
+ },
1381
+ {
1382
+ "epoch": 145.89,
1383
+ "eval_f1": 0.9032258064516129,
1384
+ "eval_loss": 0.4469843804836273,
1385
+ "eval_runtime": 0.5194,
1386
+ "eval_samples_per_second": 134.777,
1387
+ "eval_steps_per_second": 5.776,
1388
+ "step": 292
1389
+ },
1390
+ {
1391
+ "epoch": 146.89,
1392
+ "eval_f1": 0.9032258064516129,
1393
+ "eval_loss": 0.44694092869758606,
1394
+ "eval_runtime": 0.5135,
1395
+ "eval_samples_per_second": 136.31,
1396
+ "eval_steps_per_second": 5.842,
1397
+ "step": 294
1398
+ },
1399
+ {
1400
+ "epoch": 147.89,
1401
+ "eval_f1": 0.9032258064516129,
1402
+ "eval_loss": 0.4469132423400879,
1403
+ "eval_runtime": 0.5162,
1404
+ "eval_samples_per_second": 135.603,
1405
+ "eval_steps_per_second": 5.812,
1406
+ "step": 296
1407
+ },
1408
+ {
1409
+ "epoch": 148.89,
1410
+ "eval_f1": 0.9032258064516129,
1411
+ "eval_loss": 0.44689762592315674,
1412
+ "eval_runtime": 0.514,
1413
+ "eval_samples_per_second": 136.176,
1414
+ "eval_steps_per_second": 5.836,
1415
+ "step": 298
1416
+ },
1417
+ {
1418
+ "epoch": 149.89,
1419
+ "learning_rate": 0.0,
1420
+ "loss": 0.2556,
1421
+ "step": 300
1422
+ },
1423
+ {
1424
+ "epoch": 149.89,
1425
+ "eval_f1": 0.9032258064516129,
1426
+ "eval_loss": 0.44689178466796875,
1427
+ "eval_runtime": 0.5161,
1428
+ "eval_samples_per_second": 135.631,
1429
+ "eval_steps_per_second": 5.813,
1430
+ "step": 300
1431
+ },
1432
+ {
1433
+ "epoch": 149.89,
1434
+ "step": 300,
1435
+ "total_flos": 1.0435256966870508e+18,
1436
+ "train_loss": 0.3680222670237223,
1437
+ "train_runtime": 877.204,
1438
+ "train_samples_per_second": 47.366,
1439
+ "train_steps_per_second": 0.342
1440
  }
1441
  ],
1442
+ "max_steps": 300,
1443
+ "num_train_epochs": 150,
1444
+ "total_flos": 1.0435256966870508e+18,
1445
  "trial_name": null,
1446
  "trial_params": null
1447
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:267ba35e6adf6eb6c01f73e02dabe9f9040c5d7d0b2abeece88e0e9424fc738b
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d1617033179273eb9bed1677a63638edc820431ce580d577ec90836a4cfcb0
3
  size 3311