davanstrien HF staff commited on
Commit
94f719a
·
1 Parent(s): 3958293

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 40.0,
3
- "eval_f1": 0.9746835443037974,
4
- "eval_loss": 0.09489229321479797,
5
- "eval_runtime": 9.9057,
6
- "eval_samples_per_second": 7.975,
7
- "eval_steps_per_second": 0.101,
8
- "train_loss": 0.17715629466942379,
9
- "train_runtime": 2286.0002,
10
- "train_samples_per_second": 7.804,
11
- "train_steps_per_second": 0.122
12
  }
 
1
  {
2
  "epoch": 40.0,
3
+ "eval_f1": 0.9873417721518988,
4
+ "eval_loss": 0.06278952211141586,
5
+ "eval_runtime": 8.9053,
6
+ "eval_samples_per_second": 8.871,
7
+ "eval_steps_per_second": 0.112,
8
+ "train_loss": 0.09473916946777276,
9
+ "train_runtime": 2533.8499,
10
+ "train_samples_per_second": 7.041,
11
+ "train_steps_per_second": 0.111
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 40.0,
3
- "eval_f1": 0.9746835443037974,
4
- "eval_loss": 0.09489229321479797,
5
- "eval_runtime": 9.9057,
6
- "eval_samples_per_second": 7.975,
7
- "eval_steps_per_second": 0.101
8
  }
 
1
  {
2
  "epoch": 40.0,
3
+ "eval_f1": 0.9873417721518988,
4
+ "eval_loss": 0.06278952211141586,
5
+ "eval_runtime": 8.9053,
6
+ "eval_samples_per_second": 8.871,
7
+ "eval_steps_per_second": 0.112
8
  }
runs/Dec07_14-05-13_4c8c9451709b/events.out.tfevents.1670424940.4c8c9451709b.490.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b8451272354caee02c3e6a72fdbb1f11ec5b8e5815bc8e25ab2189699e6229b
3
+ size 357
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 40.0,
3
- "train_loss": 0.17715629466942379,
4
- "train_runtime": 2286.0002,
5
- "train_samples_per_second": 7.804,
6
- "train_steps_per_second": 0.122
7
  }
 
1
  {
2
  "epoch": 40.0,
3
+ "train_loss": 0.09473916946777276,
4
+ "train_runtime": 2533.8499,
5
+ "train_samples_per_second": 7.041,
6
+ "train_steps_per_second": 0.111
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.09489229321479797,
3
- "best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-273",
4
  "epoch": 40.0,
5
  "global_step": 280,
6
  "is_hyper_param_search": false,
@@ -10,539 +10,539 @@
10
  {
11
  "epoch": 1.0,
12
  "eval_f1": 0.8607594936708861,
13
- "eval_loss": 0.45760300755500793,
14
- "eval_runtime": 9.802,
15
- "eval_samples_per_second": 8.06,
16
- "eval_steps_per_second": 0.102,
17
  "step": 7
18
  },
19
  {
20
  "epoch": 1.43,
21
  "learning_rate": 1.928571428571429e-05,
22
- "loss": 0.5021,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 2.0,
27
  "eval_f1": 0.8607594936708861,
28
- "eval_loss": 0.39525437355041504,
29
- "eval_runtime": 9.734,
30
- "eval_samples_per_second": 8.116,
31
- "eval_steps_per_second": 0.103,
32
  "step": 14
33
  },
34
  {
35
  "epoch": 2.86,
36
  "learning_rate": 1.8571428571428575e-05,
37
- "loss": 0.3595,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 3.0,
42
  "eval_f1": 0.8607594936708861,
43
- "eval_loss": 0.38087165355682373,
44
- "eval_runtime": 9.8033,
45
- "eval_samples_per_second": 8.059,
46
- "eval_steps_per_second": 0.102,
47
  "step": 21
48
  },
49
  {
50
  "epoch": 4.0,
51
  "eval_f1": 0.8607594936708861,
52
- "eval_loss": 0.32862257957458496,
53
- "eval_runtime": 9.7791,
54
- "eval_samples_per_second": 8.078,
55
- "eval_steps_per_second": 0.102,
56
  "step": 28
57
  },
58
  {
59
  "epoch": 4.29,
60
  "learning_rate": 1.785714285714286e-05,
61
- "loss": 0.3009,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 5.0,
66
  "eval_f1": 0.8607594936708861,
67
- "eval_loss": 0.29453742504119873,
68
- "eval_runtime": 9.9491,
69
- "eval_samples_per_second": 7.94,
70
- "eval_steps_per_second": 0.101,
71
  "step": 35
72
  },
73
  {
74
  "epoch": 5.71,
75
  "learning_rate": 1.7142857142857142e-05,
76
- "loss": 0.2843,
77
  "step": 40
78
  },
79
  {
80
  "epoch": 6.0,
81
  "eval_f1": 0.8607594936708861,
82
- "eval_loss": 0.35278087854385376,
83
- "eval_runtime": 9.8405,
84
- "eval_samples_per_second": 8.028,
85
- "eval_steps_per_second": 0.102,
86
  "step": 42
87
  },
88
  {
89
  "epoch": 7.0,
90
- "eval_f1": 0.8607594936708861,
91
- "eval_loss": 0.23452825844287872,
92
- "eval_runtime": 9.7839,
93
- "eval_samples_per_second": 8.074,
94
- "eval_steps_per_second": 0.102,
95
  "step": 49
96
  },
97
  {
98
  "epoch": 7.14,
99
  "learning_rate": 1.642857142857143e-05,
100
- "loss": 0.266,
101
  "step": 50
102
  },
103
  {
104
  "epoch": 8.0,
105
- "eval_f1": 0.8607594936708861,
106
- "eval_loss": 0.24986670911312103,
107
- "eval_runtime": 9.8907,
108
- "eval_samples_per_second": 7.987,
109
- "eval_steps_per_second": 0.101,
110
  "step": 56
111
  },
112
  {
113
  "epoch": 8.57,
114
  "learning_rate": 1.5714285714285715e-05,
115
- "loss": 0.222,
116
  "step": 60
117
  },
118
  {
119
  "epoch": 9.0,
120
- "eval_f1": 0.8607594936708861,
121
- "eval_loss": 0.25441065430641174,
122
- "eval_runtime": 9.8794,
123
- "eval_samples_per_second": 7.996,
124
- "eval_steps_per_second": 0.101,
125
  "step": 63
126
  },
127
  {
128
  "epoch": 10.0,
129
  "learning_rate": 1.5000000000000002e-05,
130
- "loss": 0.2018,
131
  "step": 70
132
  },
133
  {
134
  "epoch": 10.0,
135
- "eval_f1": 0.8607594936708861,
136
- "eval_loss": 0.19540712237358093,
137
- "eval_runtime": 10.3358,
138
- "eval_samples_per_second": 7.643,
139
- "eval_steps_per_second": 0.097,
140
  "step": 70
141
  },
142
  {
143
  "epoch": 11.0,
144
- "eval_f1": 0.8607594936708861,
145
- "eval_loss": 0.23508581519126892,
146
- "eval_runtime": 9.7761,
147
- "eval_samples_per_second": 8.081,
148
- "eval_steps_per_second": 0.102,
149
  "step": 77
150
  },
151
  {
152
  "epoch": 11.43,
153
  "learning_rate": 1.4285714285714287e-05,
154
- "loss": 0.1948,
155
  "step": 80
156
  },
157
  {
158
  "epoch": 12.0,
159
- "eval_f1": 0.8607594936708861,
160
- "eval_loss": 0.17053687572479248,
161
- "eval_runtime": 9.7433,
162
- "eval_samples_per_second": 8.108,
163
- "eval_steps_per_second": 0.103,
164
  "step": 84
165
  },
166
  {
167
  "epoch": 12.86,
168
  "learning_rate": 1.3571428571428574e-05,
169
- "loss": 0.2053,
170
  "step": 90
171
  },
172
  {
173
  "epoch": 13.0,
174
- "eval_f1": 0.8734177215189873,
175
- "eval_loss": 0.16247014701366425,
176
- "eval_runtime": 9.7199,
177
- "eval_samples_per_second": 8.128,
178
- "eval_steps_per_second": 0.103,
179
  "step": 91
180
  },
181
  {
182
  "epoch": 14.0,
183
- "eval_f1": 0.9367088607594937,
184
- "eval_loss": 0.17189449071884155,
185
- "eval_runtime": 9.7734,
186
- "eval_samples_per_second": 8.083,
187
- "eval_steps_per_second": 0.102,
188
  "step": 98
189
  },
190
  {
191
  "epoch": 14.29,
192
  "learning_rate": 1.2857142857142859e-05,
193
- "loss": 0.1729,
194
  "step": 100
195
  },
196
  {
197
  "epoch": 15.0,
198
- "eval_f1": 0.9367088607594937,
199
- "eval_loss": 0.1488722860813141,
200
- "eval_runtime": 9.7192,
201
- "eval_samples_per_second": 8.128,
202
- "eval_steps_per_second": 0.103,
203
  "step": 105
204
  },
205
  {
206
  "epoch": 15.71,
207
  "learning_rate": 1.2142857142857142e-05,
208
- "loss": 0.1535,
209
  "step": 110
210
  },
211
  {
212
  "epoch": 16.0,
213
- "eval_f1": 0.9493670886075949,
214
- "eval_loss": 0.14498455822467804,
215
- "eval_runtime": 9.7322,
216
- "eval_samples_per_second": 8.117,
217
- "eval_steps_per_second": 0.103,
218
  "step": 112
219
  },
220
  {
221
  "epoch": 17.0,
222
- "eval_f1": 0.9493670886075949,
223
- "eval_loss": 0.1749649941921234,
224
- "eval_runtime": 9.718,
225
- "eval_samples_per_second": 8.129,
226
- "eval_steps_per_second": 0.103,
227
  "step": 119
228
  },
229
  {
230
  "epoch": 17.14,
231
  "learning_rate": 1.1428571428571429e-05,
232
- "loss": 0.1492,
233
  "step": 120
234
  },
235
  {
236
  "epoch": 18.0,
237
- "eval_f1": 0.9493670886075949,
238
- "eval_loss": 0.15143541991710663,
239
- "eval_runtime": 9.7273,
240
- "eval_samples_per_second": 8.121,
241
- "eval_steps_per_second": 0.103,
242
  "step": 126
243
  },
244
  {
245
  "epoch": 18.57,
246
  "learning_rate": 1.0714285714285714e-05,
247
- "loss": 0.1349,
248
  "step": 130
249
  },
250
  {
251
  "epoch": 19.0,
252
- "eval_f1": 0.9620253164556962,
253
- "eval_loss": 0.1304464191198349,
254
- "eval_runtime": 9.7442,
255
- "eval_samples_per_second": 8.107,
256
- "eval_steps_per_second": 0.103,
257
  "step": 133
258
  },
259
  {
260
  "epoch": 20.0,
261
  "learning_rate": 1e-05,
262
- "loss": 0.1538,
263
  "step": 140
264
  },
265
  {
266
  "epoch": 20.0,
267
- "eval_f1": 0.9620253164556962,
268
- "eval_loss": 0.12909165024757385,
269
- "eval_runtime": 9.7157,
270
- "eval_samples_per_second": 8.131,
271
- "eval_steps_per_second": 0.103,
272
  "step": 140
273
  },
274
  {
275
  "epoch": 21.0,
276
- "eval_f1": 0.9620253164556962,
277
- "eval_loss": 0.13061794638633728,
278
- "eval_runtime": 9.7515,
279
- "eval_samples_per_second": 8.101,
280
- "eval_steps_per_second": 0.103,
281
  "step": 147
282
  },
283
  {
284
  "epoch": 21.43,
285
  "learning_rate": 9.285714285714288e-06,
286
- "loss": 0.1357,
287
  "step": 150
288
  },
289
  {
290
  "epoch": 22.0,
291
- "eval_f1": 0.9620253164556962,
292
- "eval_loss": 0.12830054759979248,
293
- "eval_runtime": 9.7367,
294
- "eval_samples_per_second": 8.114,
295
- "eval_steps_per_second": 0.103,
296
  "step": 154
297
  },
298
  {
299
  "epoch": 22.86,
300
  "learning_rate": 8.571428571428571e-06,
301
- "loss": 0.147,
302
  "step": 160
303
  },
304
  {
305
  "epoch": 23.0,
306
  "eval_f1": 0.9493670886075949,
307
- "eval_loss": 0.12891362607479095,
308
- "eval_runtime": 9.7565,
309
- "eval_samples_per_second": 8.097,
310
- "eval_steps_per_second": 0.102,
311
  "step": 161
312
  },
313
  {
314
  "epoch": 24.0,
315
  "eval_f1": 0.9746835443037974,
316
- "eval_loss": 0.1338558942079544,
317
- "eval_runtime": 9.7379,
318
- "eval_samples_per_second": 8.113,
319
- "eval_steps_per_second": 0.103,
320
  "step": 168
321
  },
322
  {
323
  "epoch": 24.29,
324
  "learning_rate": 7.857142857142858e-06,
325
- "loss": 0.1388,
326
  "step": 170
327
  },
328
  {
329
  "epoch": 25.0,
330
  "eval_f1": 0.9493670886075949,
331
- "eval_loss": 0.12436553090810776,
332
- "eval_runtime": 9.7614,
333
- "eval_samples_per_second": 8.093,
334
- "eval_steps_per_second": 0.102,
335
  "step": 175
336
  },
337
  {
338
  "epoch": 25.71,
339
  "learning_rate": 7.1428571428571436e-06,
340
- "loss": 0.1192,
341
  "step": 180
342
  },
343
  {
344
  "epoch": 26.0,
345
- "eval_f1": 0.9746835443037974,
346
- "eval_loss": 0.11165592074394226,
347
- "eval_runtime": 9.7177,
348
- "eval_samples_per_second": 8.129,
349
- "eval_steps_per_second": 0.103,
350
  "step": 182
351
  },
352
  {
353
  "epoch": 27.0,
354
- "eval_f1": 0.9873417721518988,
355
- "eval_loss": 0.11046960204839706,
356
- "eval_runtime": 9.7417,
357
- "eval_samples_per_second": 8.109,
358
- "eval_steps_per_second": 0.103,
359
  "step": 189
360
  },
361
  {
362
  "epoch": 27.14,
363
  "learning_rate": 6.4285714285714295e-06,
364
- "loss": 0.112,
365
  "step": 190
366
  },
367
  {
368
  "epoch": 28.0,
369
- "eval_f1": 0.9746835443037974,
370
- "eval_loss": 0.1078834980726242,
371
- "eval_runtime": 9.7562,
372
- "eval_samples_per_second": 8.097,
373
- "eval_steps_per_second": 0.102,
374
  "step": 196
375
  },
376
  {
377
  "epoch": 28.57,
378
  "learning_rate": 5.7142857142857145e-06,
379
- "loss": 0.1215,
380
  "step": 200
381
  },
382
  {
383
  "epoch": 29.0,
384
- "eval_f1": 0.9620253164556962,
385
- "eval_loss": 0.11511888355016708,
386
- "eval_runtime": 9.7361,
387
- "eval_samples_per_second": 8.114,
388
- "eval_steps_per_second": 0.103,
389
  "step": 203
390
  },
391
  {
392
  "epoch": 30.0,
393
  "learning_rate": 5e-06,
394
- "loss": 0.1139,
395
  "step": 210
396
  },
397
  {
398
  "epoch": 30.0,
399
- "eval_f1": 0.9873417721518988,
400
- "eval_loss": 0.10075395554304123,
401
- "eval_runtime": 9.7351,
402
- "eval_samples_per_second": 8.115,
403
- "eval_steps_per_second": 0.103,
404
  "step": 210
405
  },
406
  {
407
  "epoch": 31.0,
408
- "eval_f1": 0.9746835443037974,
409
- "eval_loss": 0.10330603271722794,
410
- "eval_runtime": 9.7699,
411
- "eval_samples_per_second": 8.086,
412
- "eval_steps_per_second": 0.102,
413
  "step": 217
414
  },
415
  {
416
  "epoch": 31.43,
417
  "learning_rate": 4.2857142857142855e-06,
418
- "loss": 0.1164,
419
  "step": 220
420
  },
421
  {
422
  "epoch": 32.0,
423
- "eval_f1": 0.9873417721518988,
424
- "eval_loss": 0.0984945222735405,
425
- "eval_runtime": 9.7232,
426
- "eval_samples_per_second": 8.125,
427
- "eval_steps_per_second": 0.103,
428
  "step": 224
429
  },
430
  {
431
  "epoch": 32.86,
432
  "learning_rate": 3.5714285714285718e-06,
433
- "loss": 0.1192,
434
  "step": 230
435
  },
436
  {
437
  "epoch": 33.0,
438
- "eval_f1": 0.9873417721518988,
439
- "eval_loss": 0.09554588049650192,
440
- "eval_runtime": 9.7164,
441
- "eval_samples_per_second": 8.131,
442
- "eval_steps_per_second": 0.103,
443
  "step": 231
444
  },
445
  {
446
  "epoch": 34.0,
447
- "eval_f1": 0.9620253164556962,
448
- "eval_loss": 0.10772588849067688,
449
- "eval_runtime": 9.7297,
450
- "eval_samples_per_second": 8.119,
451
- "eval_steps_per_second": 0.103,
452
  "step": 238
453
  },
454
  {
455
  "epoch": 34.29,
456
  "learning_rate": 2.8571428571428573e-06,
457
- "loss": 0.1132,
458
  "step": 240
459
  },
460
  {
461
  "epoch": 35.0,
462
- "eval_f1": 0.9620253164556962,
463
- "eval_loss": 0.11074268072843552,
464
- "eval_runtime": 9.7358,
465
- "eval_samples_per_second": 8.114,
466
- "eval_steps_per_second": 0.103,
467
  "step": 245
468
  },
469
  {
470
  "epoch": 35.71,
471
  "learning_rate": 2.1428571428571427e-06,
472
- "loss": 0.1021,
473
  "step": 250
474
  },
475
  {
476
  "epoch": 36.0,
477
- "eval_f1": 0.9873417721518988,
478
- "eval_loss": 0.09575933963060379,
479
- "eval_runtime": 9.7306,
480
- "eval_samples_per_second": 8.119,
481
- "eval_steps_per_second": 0.103,
482
  "step": 252
483
  },
484
  {
485
  "epoch": 37.0,
486
- "eval_f1": 0.9873417721518988,
487
- "eval_loss": 0.09573517739772797,
488
- "eval_runtime": 9.7256,
489
- "eval_samples_per_second": 8.123,
490
- "eval_steps_per_second": 0.103,
491
  "step": 259
492
  },
493
  {
494
  "epoch": 37.14,
495
  "learning_rate": 1.4285714285714286e-06,
496
- "loss": 0.0945,
497
  "step": 260
498
  },
499
  {
500
  "epoch": 38.0,
501
- "eval_f1": 0.9746835443037974,
502
- "eval_loss": 0.09509044885635376,
503
- "eval_runtime": 9.7255,
504
- "eval_samples_per_second": 8.123,
505
- "eval_steps_per_second": 0.103,
506
  "step": 266
507
  },
508
  {
509
  "epoch": 38.57,
510
  "learning_rate": 7.142857142857143e-07,
511
- "loss": 0.1244,
512
  "step": 270
513
  },
514
  {
515
  "epoch": 39.0,
516
- "eval_f1": 0.9746835443037974,
517
- "eval_loss": 0.09489229321479797,
518
- "eval_runtime": 9.7321,
519
- "eval_samples_per_second": 8.117,
520
- "eval_steps_per_second": 0.103,
521
  "step": 273
522
  },
523
  {
524
  "epoch": 40.0,
525
  "learning_rate": 0.0,
526
- "loss": 0.1012,
527
  "step": 280
528
  },
529
  {
530
  "epoch": 40.0,
531
- "eval_f1": 0.9873417721518988,
532
- "eval_loss": 0.09554900228977203,
533
- "eval_runtime": 9.7342,
534
- "eval_samples_per_second": 8.116,
535
- "eval_steps_per_second": 0.103,
536
  "step": 280
537
  },
538
  {
539
  "epoch": 40.0,
540
  "step": 280,
541
  "total_flos": 1.4091487038849024e+18,
542
- "train_loss": 0.17715629466942379,
543
- "train_runtime": 2286.0002,
544
- "train_samples_per_second": 7.804,
545
- "train_steps_per_second": 0.122
546
  }
547
  ],
548
  "max_steps": 280,
 
1
  {
2
+ "best_metric": 0.06278952211141586,
3
+ "best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-217",
4
  "epoch": 40.0,
5
  "global_step": 280,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 1.0,
12
  "eval_f1": 0.8607594936708861,
13
+ "eval_loss": 0.4529457688331604,
14
+ "eval_runtime": 8.6926,
15
+ "eval_samples_per_second": 9.088,
16
+ "eval_steps_per_second": 0.115,
17
  "step": 7
18
  },
19
  {
20
  "epoch": 1.43,
21
  "learning_rate": 1.928571428571429e-05,
22
+ "loss": 0.5024,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 2.0,
27
  "eval_f1": 0.8607594936708861,
28
+ "eval_loss": 0.39888718724250793,
29
+ "eval_runtime": 9.0793,
30
+ "eval_samples_per_second": 8.701,
31
+ "eval_steps_per_second": 0.11,
32
  "step": 14
33
  },
34
  {
35
  "epoch": 2.86,
36
  "learning_rate": 1.8571428571428575e-05,
37
+ "loss": 0.3533,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 3.0,
42
  "eval_f1": 0.8607594936708861,
43
+ "eval_loss": 0.37408992648124695,
44
+ "eval_runtime": 9.0097,
45
+ "eval_samples_per_second": 8.768,
46
+ "eval_steps_per_second": 0.111,
47
  "step": 21
48
  },
49
  {
50
  "epoch": 4.0,
51
  "eval_f1": 0.8607594936708861,
52
+ "eval_loss": 0.3160648047924042,
53
+ "eval_runtime": 9.1428,
54
+ "eval_samples_per_second": 8.641,
55
+ "eval_steps_per_second": 0.109,
56
  "step": 28
57
  },
58
  {
59
  "epoch": 4.29,
60
  "learning_rate": 1.785714285714286e-05,
61
+ "loss": 0.285,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 5.0,
66
  "eval_f1": 0.8607594936708861,
67
+ "eval_loss": 0.282362163066864,
68
+ "eval_runtime": 9.1103,
69
+ "eval_samples_per_second": 8.671,
70
+ "eval_steps_per_second": 0.11,
71
  "step": 35
72
  },
73
  {
74
  "epoch": 5.71,
75
  "learning_rate": 1.7142857142857142e-05,
76
+ "loss": 0.2491,
77
  "step": 40
78
  },
79
  {
80
  "epoch": 6.0,
81
  "eval_f1": 0.8607594936708861,
82
+ "eval_loss": 0.2700817883014679,
83
+ "eval_runtime": 8.8127,
84
+ "eval_samples_per_second": 8.964,
85
+ "eval_steps_per_second": 0.113,
86
  "step": 42
87
  },
88
  {
89
  "epoch": 7.0,
90
+ "eval_f1": 0.9113924050632911,
91
+ "eval_loss": 0.2062235176563263,
92
+ "eval_runtime": 8.5646,
93
+ "eval_samples_per_second": 9.224,
94
+ "eval_steps_per_second": 0.117,
95
  "step": 49
96
  },
97
  {
98
  "epoch": 7.14,
99
  "learning_rate": 1.642857142857143e-05,
100
+ "loss": 0.2032,
101
  "step": 50
102
  },
103
  {
104
  "epoch": 8.0,
105
+ "eval_f1": 0.9493670886075949,
106
+ "eval_loss": 0.20497918128967285,
107
+ "eval_runtime": 8.6022,
108
+ "eval_samples_per_second": 9.184,
109
+ "eval_steps_per_second": 0.116,
110
  "step": 56
111
  },
112
  {
113
  "epoch": 8.57,
114
  "learning_rate": 1.5714285714285715e-05,
115
+ "loss": 0.157,
116
  "step": 60
117
  },
118
  {
119
  "epoch": 9.0,
120
+ "eval_f1": 0.9493670886075949,
121
+ "eval_loss": 0.20131482183933258,
122
+ "eval_runtime": 8.5286,
123
+ "eval_samples_per_second": 9.263,
124
+ "eval_steps_per_second": 0.117,
125
  "step": 63
126
  },
127
  {
128
  "epoch": 10.0,
129
  "learning_rate": 1.5000000000000002e-05,
130
+ "loss": 0.1127,
131
  "step": 70
132
  },
133
  {
134
  "epoch": 10.0,
135
+ "eval_f1": 0.9367088607594937,
136
+ "eval_loss": 0.19601519405841827,
137
+ "eval_runtime": 8.52,
138
+ "eval_samples_per_second": 9.272,
139
+ "eval_steps_per_second": 0.117,
140
  "step": 70
141
  },
142
  {
143
  "epoch": 11.0,
144
+ "eval_f1": 0.9493670886075949,
145
+ "eval_loss": 0.14171478152275085,
146
+ "eval_runtime": 8.4584,
147
+ "eval_samples_per_second": 9.34,
148
+ "eval_steps_per_second": 0.118,
149
  "step": 77
150
  },
151
  {
152
  "epoch": 11.43,
153
  "learning_rate": 1.4285714285714287e-05,
154
+ "loss": 0.0903,
155
  "step": 80
156
  },
157
  {
158
  "epoch": 12.0,
159
+ "eval_f1": 0.9493670886075949,
160
+ "eval_loss": 0.1306915283203125,
161
+ "eval_runtime": 8.4328,
162
+ "eval_samples_per_second": 9.368,
163
+ "eval_steps_per_second": 0.119,
164
  "step": 84
165
  },
166
  {
167
  "epoch": 12.86,
168
  "learning_rate": 1.3571428571428574e-05,
169
+ "loss": 0.0922,
170
  "step": 90
171
  },
172
  {
173
  "epoch": 13.0,
174
+ "eval_f1": 0.9873417721518988,
175
+ "eval_loss": 0.08702569454908371,
176
+ "eval_runtime": 8.8303,
177
+ "eval_samples_per_second": 8.947,
178
+ "eval_steps_per_second": 0.113,
179
  "step": 91
180
  },
181
  {
182
  "epoch": 14.0,
183
+ "eval_f1": 0.9240506329113924,
184
+ "eval_loss": 0.20480988919734955,
185
+ "eval_runtime": 8.6551,
186
+ "eval_samples_per_second": 9.128,
187
+ "eval_steps_per_second": 0.116,
188
  "step": 98
189
  },
190
  {
191
  "epoch": 14.29,
192
  "learning_rate": 1.2857142857142859e-05,
193
+ "loss": 0.0595,
194
  "step": 100
195
  },
196
  {
197
  "epoch": 15.0,
198
+ "eval_f1": 0.9620253164556962,
199
+ "eval_loss": 0.12036000937223434,
200
+ "eval_runtime": 8.6389,
201
+ "eval_samples_per_second": 9.145,
202
+ "eval_steps_per_second": 0.116,
203
  "step": 105
204
  },
205
  {
206
  "epoch": 15.71,
207
  "learning_rate": 1.2142857142857142e-05,
208
+ "loss": 0.0527,
209
  "step": 110
210
  },
211
  {
212
  "epoch": 16.0,
213
+ "eval_f1": 0.9367088607594937,
214
+ "eval_loss": 0.2552852928638458,
215
+ "eval_runtime": 8.4875,
216
+ "eval_samples_per_second": 9.308,
217
+ "eval_steps_per_second": 0.118,
218
  "step": 112
219
  },
220
  {
221
  "epoch": 17.0,
222
+ "eval_f1": 0.9367088607594937,
223
+ "eval_loss": 0.16753825545310974,
224
+ "eval_runtime": 8.4252,
225
+ "eval_samples_per_second": 9.377,
226
+ "eval_steps_per_second": 0.119,
227
  "step": 119
228
  },
229
  {
230
  "epoch": 17.14,
231
  "learning_rate": 1.1428571428571429e-05,
232
+ "loss": 0.0477,
233
  "step": 120
234
  },
235
  {
236
  "epoch": 18.0,
237
+ "eval_f1": 0.9240506329113924,
238
+ "eval_loss": 0.22650040686130524,
239
+ "eval_runtime": 8.392,
240
+ "eval_samples_per_second": 9.414,
241
+ "eval_steps_per_second": 0.119,
242
  "step": 126
243
  },
244
  {
245
  "epoch": 18.57,
246
  "learning_rate": 1.0714285714285714e-05,
247
+ "loss": 0.0411,
248
  "step": 130
249
  },
250
  {
251
  "epoch": 19.0,
252
+ "eval_f1": 0.9367088607594937,
253
+ "eval_loss": 0.1900627613067627,
254
+ "eval_runtime": 8.4937,
255
+ "eval_samples_per_second": 9.301,
256
+ "eval_steps_per_second": 0.118,
257
  "step": 133
258
  },
259
  {
260
  "epoch": 20.0,
261
  "learning_rate": 1e-05,
262
+ "loss": 0.0299,
263
  "step": 140
264
  },
265
  {
266
  "epoch": 20.0,
267
+ "eval_f1": 0.9240506329113924,
268
+ "eval_loss": 0.2422873079776764,
269
+ "eval_runtime": 8.7448,
270
+ "eval_samples_per_second": 9.034,
271
+ "eval_steps_per_second": 0.114,
272
  "step": 140
273
  },
274
  {
275
  "epoch": 21.0,
276
+ "eval_f1": 0.9873417721518988,
277
+ "eval_loss": 0.06394638121128082,
278
+ "eval_runtime": 8.7204,
279
+ "eval_samples_per_second": 9.059,
280
+ "eval_steps_per_second": 0.115,
281
  "step": 147
282
  },
283
  {
284
  "epoch": 21.43,
285
  "learning_rate": 9.285714285714288e-06,
286
+ "loss": 0.0487,
287
  "step": 150
288
  },
289
  {
290
  "epoch": 22.0,
291
+ "eval_f1": 0.9493670886075949,
292
+ "eval_loss": 0.12548162043094635,
293
+ "eval_runtime": 8.9171,
294
+ "eval_samples_per_second": 8.859,
295
+ "eval_steps_per_second": 0.112,
296
  "step": 154
297
  },
298
  {
299
  "epoch": 22.86,
300
  "learning_rate": 8.571428571428571e-06,
301
+ "loss": 0.0359,
302
  "step": 160
303
  },
304
  {
305
  "epoch": 23.0,
306
  "eval_f1": 0.9493670886075949,
307
+ "eval_loss": 0.12127607315778732,
308
+ "eval_runtime": 8.6147,
309
+ "eval_samples_per_second": 9.17,
310
+ "eval_steps_per_second": 0.116,
311
  "step": 161
312
  },
313
  {
314
  "epoch": 24.0,
315
  "eval_f1": 0.9746835443037974,
316
+ "eval_loss": 0.07274330407381058,
317
+ "eval_runtime": 8.5807,
318
+ "eval_samples_per_second": 9.207,
319
+ "eval_steps_per_second": 0.117,
320
  "step": 168
321
  },
322
  {
323
  "epoch": 24.29,
324
  "learning_rate": 7.857142857142858e-06,
325
+ "loss": 0.0302,
326
  "step": 170
327
  },
328
  {
329
  "epoch": 25.0,
330
  "eval_f1": 0.9493670886075949,
331
+ "eval_loss": 0.11162865161895752,
332
+ "eval_runtime": 8.6735,
333
+ "eval_samples_per_second": 9.108,
334
+ "eval_steps_per_second": 0.115,
335
  "step": 175
336
  },
337
  {
338
  "epoch": 25.71,
339
  "learning_rate": 7.1428571428571436e-06,
340
+ "loss": 0.0304,
341
  "step": 180
342
  },
343
  {
344
  "epoch": 26.0,
345
+ "eval_f1": 0.9493670886075949,
346
+ "eval_loss": 0.10622164607048035,
347
+ "eval_runtime": 8.8211,
348
+ "eval_samples_per_second": 8.956,
349
+ "eval_steps_per_second": 0.113,
350
  "step": 182
351
  },
352
  {
353
  "epoch": 27.0,
354
+ "eval_f1": 0.9240506329113924,
355
+ "eval_loss": 0.20966486632823944,
356
+ "eval_runtime": 8.7082,
357
+ "eval_samples_per_second": 9.072,
358
+ "eval_steps_per_second": 0.115,
359
  "step": 189
360
  },
361
  {
362
  "epoch": 27.14,
363
  "learning_rate": 6.4285714285714295e-06,
364
+ "loss": 0.0274,
365
  "step": 190
366
  },
367
  {
368
  "epoch": 28.0,
369
+ "eval_f1": 0.9493670886075949,
370
+ "eval_loss": 0.1276017129421234,
371
+ "eval_runtime": 8.676,
372
+ "eval_samples_per_second": 9.106,
373
+ "eval_steps_per_second": 0.115,
374
  "step": 196
375
  },
376
  {
377
  "epoch": 28.57,
378
  "learning_rate": 5.7142857142857145e-06,
379
+ "loss": 0.0291,
380
  "step": 200
381
  },
382
  {
383
  "epoch": 29.0,
384
+ "eval_f1": 0.9493670886075949,
385
+ "eval_loss": 0.09670199453830719,
386
+ "eval_runtime": 8.5086,
387
+ "eval_samples_per_second": 9.285,
388
+ "eval_steps_per_second": 0.118,
389
  "step": 203
390
  },
391
  {
392
  "epoch": 30.0,
393
  "learning_rate": 5e-06,
394
+ "loss": 0.0202,
395
  "step": 210
396
  },
397
  {
398
  "epoch": 30.0,
399
+ "eval_f1": 0.9746835443037974,
400
+ "eval_loss": 0.07649976760149002,
401
+ "eval_runtime": 8.9676,
402
+ "eval_samples_per_second": 8.81,
403
+ "eval_steps_per_second": 0.112,
404
  "step": 210
405
  },
406
  {
407
  "epoch": 31.0,
408
+ "eval_f1": 0.9873417721518988,
409
+ "eval_loss": 0.06278952211141586,
410
+ "eval_runtime": 8.7232,
411
+ "eval_samples_per_second": 9.056,
412
+ "eval_steps_per_second": 0.115,
413
  "step": 217
414
  },
415
  {
416
  "epoch": 31.43,
417
  "learning_rate": 4.2857142857142855e-06,
418
+ "loss": 0.0232,
419
  "step": 220
420
  },
421
  {
422
  "epoch": 32.0,
423
+ "eval_f1": 0.9493670886075949,
424
+ "eval_loss": 0.13882263004779816,
425
+ "eval_runtime": 8.6516,
426
+ "eval_samples_per_second": 9.131,
427
+ "eval_steps_per_second": 0.116,
428
  "step": 224
429
  },
430
  {
431
  "epoch": 32.86,
432
  "learning_rate": 3.5714285714285718e-06,
433
+ "loss": 0.0264,
434
  "step": 230
435
  },
436
  {
437
  "epoch": 33.0,
438
+ "eval_f1": 0.9493670886075949,
439
+ "eval_loss": 0.10616844147443771,
440
+ "eval_runtime": 8.6952,
441
+ "eval_samples_per_second": 9.085,
442
+ "eval_steps_per_second": 0.115,
443
  "step": 231
444
  },
445
  {
446
  "epoch": 34.0,
447
+ "eval_f1": 0.9493670886075949,
448
+ "eval_loss": 0.1320488601922989,
449
+ "eval_runtime": 8.7896,
450
+ "eval_samples_per_second": 8.988,
451
+ "eval_steps_per_second": 0.114,
452
  "step": 238
453
  },
454
  {
455
  "epoch": 34.29,
456
  "learning_rate": 2.8571428571428573e-06,
457
+ "loss": 0.0219,
458
  "step": 240
459
  },
460
  {
461
  "epoch": 35.0,
462
+ "eval_f1": 0.9493670886075949,
463
+ "eval_loss": 0.1528194099664688,
464
+ "eval_runtime": 8.8303,
465
+ "eval_samples_per_second": 8.946,
466
+ "eval_steps_per_second": 0.113,
467
  "step": 245
468
  },
469
  {
470
  "epoch": 35.71,
471
  "learning_rate": 2.1428571428571427e-06,
472
+ "loss": 0.0194,
473
  "step": 250
474
  },
475
  {
476
  "epoch": 36.0,
477
+ "eval_f1": 0.9493670886075949,
478
+ "eval_loss": 0.1746273934841156,
479
+ "eval_runtime": 8.7909,
480
+ "eval_samples_per_second": 8.987,
481
+ "eval_steps_per_second": 0.114,
482
  "step": 252
483
  },
484
  {
485
  "epoch": 37.0,
486
+ "eval_f1": 0.9493670886075949,
487
+ "eval_loss": 0.16089513897895813,
488
+ "eval_runtime": 8.8367,
489
+ "eval_samples_per_second": 8.94,
490
+ "eval_steps_per_second": 0.113,
491
  "step": 259
492
  },
493
  {
494
  "epoch": 37.14,
495
  "learning_rate": 1.4285714285714286e-06,
496
+ "loss": 0.0204,
497
  "step": 260
498
  },
499
  {
500
  "epoch": 38.0,
501
+ "eval_f1": 0.9493670886075949,
502
+ "eval_loss": 0.14817634224891663,
503
+ "eval_runtime": 8.8819,
504
+ "eval_samples_per_second": 8.895,
505
+ "eval_steps_per_second": 0.113,
506
  "step": 266
507
  },
508
  {
509
  "epoch": 38.57,
510
  "learning_rate": 7.142857142857143e-07,
511
+ "loss": 0.0217,
512
  "step": 270
513
  },
514
  {
515
  "epoch": 39.0,
516
+ "eval_f1": 0.9493670886075949,
517
+ "eval_loss": 0.152223601937294,
518
+ "eval_runtime": 8.8958,
519
+ "eval_samples_per_second": 8.881,
520
+ "eval_steps_per_second": 0.112,
521
  "step": 273
522
  },
523
  {
524
  "epoch": 40.0,
525
  "learning_rate": 0.0,
526
+ "loss": 0.0216,
527
  "step": 280
528
  },
529
  {
530
  "epoch": 40.0,
531
+ "eval_f1": 0.9493670886075949,
532
+ "eval_loss": 0.14991530776023865,
533
+ "eval_runtime": 8.6557,
534
+ "eval_samples_per_second": 9.127,
535
+ "eval_steps_per_second": 0.116,
536
  "step": 280
537
  },
538
  {
539
  "epoch": 40.0,
540
  "step": 280,
541
  "total_flos": 1.4091487038849024e+18,
542
+ "train_loss": 0.09473916946777276,
543
+ "train_runtime": 2533.8499,
544
+ "train_samples_per_second": 7.041,
545
+ "train_steps_per_second": 0.111
546
  }
547
  ],
548
  "max_steps": 280,