Augusto777 commited on
Commit
7a75384
1 Parent(s): 9108cc3

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8043478260869565
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.8551
36
- - Accuracy: 0.8043
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8478260869565217
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.6099
36
+ - Accuracy: 0.8478
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 38.83,
3
- "eval_accuracy": 0.8695652173913043,
4
- "eval_loss": 0.6455863118171692,
5
- "eval_runtime": 0.9822,
6
- "eval_samples_per_second": 46.833,
7
- "eval_steps_per_second": 6.109,
8
- "train_loss": 0.2159135382026434,
9
- "train_runtime": 637.3873,
10
- "train_samples_per_second": 51.397,
11
- "train_steps_per_second": 1.569
12
  }
 
1
  {
2
+ "epoch": 39.61,
3
+ "eval_accuracy": 0.8478260869565217,
4
+ "eval_loss": 0.6099294424057007,
5
+ "eval_runtime": 1.0793,
6
+ "eval_samples_per_second": 42.622,
7
+ "eval_steps_per_second": 5.559,
8
+ "train_loss": 0.24349691933568787,
9
+ "train_runtime": 652.5132,
10
+ "train_samples_per_second": 50.206,
11
+ "train_steps_per_second": 1.042
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 38.83,
3
- "eval_accuracy": 0.8695652173913043,
4
- "eval_loss": 0.6455863118171692,
5
- "eval_runtime": 0.9822,
6
- "eval_samples_per_second": 46.833,
7
- "eval_steps_per_second": 6.109
8
  }
 
1
  {
2
+ "epoch": 39.61,
3
+ "eval_accuracy": 0.8478260869565217,
4
+ "eval_loss": 0.6099294424057007,
5
+ "eval_runtime": 1.0793,
6
+ "eval_samples_per_second": 42.622,
7
+ "eval_steps_per_second": 5.559
8
  }
runs/Jun23_14-57-58_DESKTOP-SKBE9FB/events.out.tfevents.1719176973.DESKTOP-SKBE9FB.17456.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a197d0916e9ed1086d3d3007de0726cec9a95b9910e8eb6ca9114eb1c617f109
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 38.83,
3
- "train_loss": 0.2159135382026434,
4
- "train_runtime": 637.3873,
5
- "train_samples_per_second": 51.397,
6
- "train_steps_per_second": 1.569
7
  }
 
1
  {
2
+ "epoch": 39.61,
3
+ "train_loss": 0.24349691933568787,
4
+ "train_runtime": 652.5132,
5
+ "train_samples_per_second": 50.206,
6
+ "train_steps_per_second": 1.042
7
  }
trainer_state.json CHANGED
@@ -1,980 +1,797 @@
1
  {
2
- "best_metric": 0.8695652173913043,
3
- "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-80RX\\checkpoint-540",
4
- "epoch": 38.83495145631068,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.39,
13
- "learning_rate": 1.1000000000000001e-05,
14
- "loss": 1.386,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.78,
19
- "learning_rate": 2.2000000000000003e-05,
20
- "loss": 1.3805,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.97,
25
- "eval_accuracy": 0.5,
26
- "eval_loss": 1.3566619157791138,
27
- "eval_runtime": 0.9332,
28
- "eval_samples_per_second": 49.292,
29
- "eval_steps_per_second": 6.429,
30
- "step": 25
31
  },
32
  {
33
  "epoch": 1.17,
34
- "learning_rate": 3.3e-05,
35
- "loss": 1.3523,
36
- "step": 30
37
- },
38
- {
39
- "epoch": 1.55,
40
- "learning_rate": 4.4000000000000006e-05,
41
- "loss": 1.283,
42
- "step": 40
43
  },
44
  {
45
- "epoch": 1.94,
46
- "learning_rate": 5.5e-05,
47
- "loss": 1.1576,
48
- "step": 50
49
  },
50
  {
51
  "epoch": 1.98,
52
- "eval_accuracy": 0.43478260869565216,
53
- "eval_loss": 1.1359825134277344,
54
- "eval_runtime": 0.7817,
55
- "eval_samples_per_second": 58.846,
56
- "eval_steps_per_second": 7.676,
57
- "step": 51
58
  },
59
  {
60
  "epoch": 2.33,
61
- "learning_rate": 5.4421052631578945e-05,
62
- "loss": 1.0631,
63
- "step": 60
64
- },
65
- {
66
- "epoch": 2.72,
67
- "learning_rate": 5.38421052631579e-05,
68
- "loss": 0.9331,
69
- "step": 70
70
  },
71
  {
72
- "epoch": 2.99,
73
- "eval_accuracy": 0.8043478260869565,
74
- "eval_loss": 0.8530930876731873,
75
- "eval_runtime": 0.8277,
76
- "eval_samples_per_second": 55.576,
77
- "eval_steps_per_second": 7.249,
78
- "step": 77
79
  },
80
  {
81
- "epoch": 3.11,
82
- "learning_rate": 5.3263157894736844e-05,
83
- "loss": 0.8206,
84
- "step": 80
 
 
 
85
  },
86
  {
87
  "epoch": 3.5,
88
- "learning_rate": 5.268421052631579e-05,
89
- "loss": 0.7139,
90
- "step": 90
91
- },
92
- {
93
- "epoch": 3.88,
94
- "learning_rate": 5.210526315789474e-05,
95
- "loss": 0.6657,
96
- "step": 100
97
  },
98
  {
99
- "epoch": 4.0,
100
  "eval_accuracy": 0.782608695652174,
101
- "eval_loss": 0.6856333613395691,
102
- "eval_runtime": 0.7852,
103
- "eval_samples_per_second": 58.586,
104
- "eval_steps_per_second": 7.642,
105
- "step": 103
106
  },
107
  {
108
- "epoch": 4.27,
109
- "learning_rate": 5.152631578947369e-05,
110
- "loss": 0.6225,
111
- "step": 110
112
  },
113
  {
114
  "epoch": 4.66,
115
- "learning_rate": 5.0947368421052636e-05,
116
- "loss": 0.5642,
117
- "step": 120
118
  },
119
  {
120
- "epoch": 4.97,
121
  "eval_accuracy": 0.782608695652174,
122
- "eval_loss": 0.6162217855453491,
123
- "eval_runtime": 0.8007,
124
- "eval_samples_per_second": 57.45,
125
- "eval_steps_per_second": 7.494,
126
- "step": 128
127
- },
128
- {
129
- "epoch": 5.05,
130
- "learning_rate": 5.036842105263158e-05,
131
- "loss": 0.4835,
132
- "step": 130
133
  },
134
  {
135
- "epoch": 5.44,
136
- "learning_rate": 4.978947368421053e-05,
137
- "loss": 0.4062,
138
- "step": 140
139
  },
140
  {
141
  "epoch": 5.83,
142
- "learning_rate": 4.921052631578948e-05,
143
- "loss": 0.3632,
144
- "step": 150
145
- },
146
- {
147
- "epoch": 5.98,
148
- "eval_accuracy": 0.8043478260869565,
149
- "eval_loss": 0.590190589427948,
150
- "eval_runtime": 0.7922,
151
- "eval_samples_per_second": 58.068,
152
- "eval_steps_per_second": 7.574,
153
- "step": 154
154
  },
155
  {
156
- "epoch": 6.21,
157
- "learning_rate": 4.863157894736842e-05,
158
- "loss": 0.3261,
159
- "step": 160
 
 
 
160
  },
161
  {
162
- "epoch": 6.6,
163
- "learning_rate": 4.805263157894737e-05,
164
- "loss": 0.2892,
165
- "step": 170
166
  },
167
  {
168
  "epoch": 6.99,
169
- "learning_rate": 4.747368421052632e-05,
170
- "loss": 0.3384,
171
- "step": 180
172
  },
173
  {
174
  "epoch": 6.99,
175
- "eval_accuracy": 0.8043478260869565,
176
- "eval_loss": 0.4995403587818146,
177
- "eval_runtime": 0.7967,
178
- "eval_samples_per_second": 57.739,
179
- "eval_steps_per_second": 7.531,
180
- "step": 180
181
- },
182
- {
183
- "epoch": 7.38,
184
- "learning_rate": 4.6894736842105264e-05,
185
- "loss": 0.2762,
186
- "step": 190
187
  },
188
  {
189
- "epoch": 7.77,
190
- "learning_rate": 4.631578947368421e-05,
191
- "loss": 0.2261,
192
- "step": 200
193
  },
194
  {
195
- "epoch": 8.0,
196
- "eval_accuracy": 0.7608695652173914,
197
- "eval_loss": 0.6854467988014221,
198
- "eval_runtime": 0.7832,
199
- "eval_samples_per_second": 58.734,
200
- "eval_steps_per_second": 7.661,
201
- "step": 206
202
  },
203
  {
204
  "epoch": 8.16,
205
- "learning_rate": 4.573684210526316e-05,
206
- "loss": 0.259,
207
- "step": 210
208
- },
209
- {
210
- "epoch": 8.54,
211
- "learning_rate": 4.5157894736842106e-05,
212
- "loss": 0.1924,
213
- "step": 220
214
  },
215
  {
216
- "epoch": 8.93,
217
- "learning_rate": 4.457894736842105e-05,
218
- "loss": 0.2066,
219
- "step": 230
220
  },
221
  {
222
  "epoch": 8.97,
223
- "eval_accuracy": 0.782608695652174,
224
- "eval_loss": 0.5605289936065674,
225
- "eval_runtime": 1.1543,
226
- "eval_samples_per_second": 39.852,
227
- "eval_steps_per_second": 5.198,
228
- "step": 231
229
  },
230
  {
231
  "epoch": 9.32,
232
- "learning_rate": 4.4000000000000006e-05,
233
- "loss": 0.1525,
234
- "step": 240
235
  },
236
  {
237
- "epoch": 9.71,
238
  "learning_rate": 4.342105263157895e-05,
239
- "loss": 0.1635,
240
- "step": 250
241
- },
242
- {
243
- "epoch": 9.98,
244
- "eval_accuracy": 0.7391304347826086,
245
- "eval_loss": 0.720944881439209,
246
- "eval_runtime": 0.7752,
247
- "eval_samples_per_second": 59.34,
248
- "eval_steps_per_second": 7.74,
249
- "step": 257
250
  },
251
  {
252
- "epoch": 10.1,
253
- "learning_rate": 4.28421052631579e-05,
254
- "loss": 0.1457,
255
- "step": 260
 
 
 
256
  },
257
  {
258
  "epoch": 10.49,
259
- "learning_rate": 4.226315789473684e-05,
260
- "loss": 0.1363,
261
- "step": 270
262
- },
263
- {
264
- "epoch": 10.87,
265
- "learning_rate": 4.168421052631579e-05,
266
- "loss": 0.1829,
267
- "step": 280
268
  },
269
  {
270
- "epoch": 10.99,
271
- "eval_accuracy": 0.6956521739130435,
272
- "eval_loss": 0.9292640089988708,
273
- "eval_runtime": 0.8392,
274
- "eval_samples_per_second": 54.814,
275
- "eval_steps_per_second": 7.15,
276
- "step": 283
277
  },
278
  {
279
- "epoch": 11.26,
280
- "learning_rate": 4.110526315789474e-05,
281
- "loss": 0.1439,
282
- "step": 290
283
  },
284
  {
285
  "epoch": 11.65,
286
- "learning_rate": 4.0526315789473684e-05,
287
- "loss": 0.1455,
288
- "step": 300
289
  },
290
  {
291
  "epoch": 12.0,
292
  "eval_accuracy": 0.782608695652174,
293
- "eval_loss": 0.5999487042427063,
294
- "eval_runtime": 0.7912,
295
- "eval_samples_per_second": 58.141,
296
- "eval_steps_per_second": 7.584,
297
- "step": 309
298
- },
299
- {
300
- "epoch": 12.04,
301
- "learning_rate": 3.994736842105263e-05,
302
- "loss": 0.1154,
303
- "step": 310
304
  },
305
  {
306
- "epoch": 12.43,
307
- "learning_rate": 3.936842105263158e-05,
308
- "loss": 0.1461,
309
- "step": 320
310
  },
311
  {
312
  "epoch": 12.82,
313
- "learning_rate": 3.8789473684210526e-05,
314
- "loss": 0.1072,
315
- "step": 330
316
- },
317
- {
318
- "epoch": 12.97,
319
- "eval_accuracy": 0.782608695652174,
320
- "eval_loss": 0.7919036149978638,
321
- "eval_runtime": 0.8292,
322
- "eval_samples_per_second": 55.476,
323
- "eval_steps_per_second": 7.236,
324
- "step": 334
325
  },
326
  {
327
- "epoch": 13.2,
328
- "learning_rate": 3.8210526315789476e-05,
329
- "loss": 0.1154,
330
- "step": 340
 
 
 
331
  },
332
  {
333
- "epoch": 13.59,
334
- "learning_rate": 3.7631578947368425e-05,
335
- "loss": 0.1484,
336
- "step": 350
337
  },
338
  {
339
  "epoch": 13.98,
340
- "learning_rate": 3.705263157894737e-05,
341
- "loss": 0.1059,
342
- "step": 360
343
  },
344
  {
345
  "epoch": 13.98,
346
  "eval_accuracy": 0.8043478260869565,
347
- "eval_loss": 0.7782349586486816,
348
- "eval_runtime": 0.7972,
349
- "eval_samples_per_second": 57.703,
350
- "eval_steps_per_second": 7.526,
351
- "step": 360
352
- },
353
- {
354
- "epoch": 14.37,
355
- "learning_rate": 3.647368421052631e-05,
356
- "loss": 0.0885,
357
- "step": 370
358
  },
359
  {
360
- "epoch": 14.76,
361
- "learning_rate": 3.589473684210527e-05,
362
- "loss": 0.0971,
363
- "step": 380
364
  },
365
  {
366
- "epoch": 14.99,
367
  "eval_accuracy": 0.8043478260869565,
368
- "eval_loss": 0.8248986005783081,
369
- "eval_runtime": 0.7922,
370
- "eval_samples_per_second": 58.067,
371
- "eval_steps_per_second": 7.574,
372
- "step": 386
373
  },
374
  {
375
  "epoch": 15.15,
376
- "learning_rate": 3.531578947368421e-05,
377
- "loss": 0.1269,
378
- "step": 390
379
- },
380
- {
381
- "epoch": 15.53,
382
- "learning_rate": 3.473684210526316e-05,
383
- "loss": 0.1137,
384
- "step": 400
385
  },
386
  {
387
- "epoch": 15.92,
388
- "learning_rate": 3.415789473684211e-05,
389
- "loss": 0.0456,
390
- "step": 410
391
  },
392
  {
393
- "epoch": 16.0,
394
- "eval_accuracy": 0.782608695652174,
395
- "eval_loss": 0.7964589595794678,
396
- "eval_runtime": 0.7832,
397
- "eval_samples_per_second": 58.735,
398
- "eval_steps_per_second": 7.661,
399
- "step": 412
400
  },
401
  {
402
  "epoch": 16.31,
403
- "learning_rate": 3.357894736842105e-05,
404
- "loss": 0.127,
405
- "step": 420
406
- },
407
- {
408
- "epoch": 16.7,
409
- "learning_rate": 3.3e-05,
410
- "loss": 0.0483,
411
- "step": 430
412
  },
413
  {
414
- "epoch": 16.97,
415
- "eval_accuracy": 0.8260869565217391,
416
- "eval_loss": 0.7163321375846863,
417
- "eval_runtime": 0.8357,
418
- "eval_samples_per_second": 55.044,
419
- "eval_steps_per_second": 7.18,
420
- "step": 437
421
  },
422
  {
423
- "epoch": 17.09,
424
- "learning_rate": 3.2421052631578945e-05,
425
- "loss": 0.1415,
426
- "step": 440
 
 
 
427
  },
428
  {
429
  "epoch": 17.48,
430
- "learning_rate": 3.1842105263157895e-05,
431
- "loss": 0.1081,
432
- "step": 450
433
- },
434
- {
435
- "epoch": 17.86,
436
- "learning_rate": 3.1263157894736845e-05,
437
- "loss": 0.0832,
438
- "step": 460
439
  },
440
  {
441
- "epoch": 17.98,
442
- "eval_accuracy": 0.782608695652174,
443
- "eval_loss": 0.8122026324272156,
444
- "eval_runtime": 0.7852,
445
- "eval_samples_per_second": 58.584,
446
- "eval_steps_per_second": 7.641,
447
- "step": 463
448
  },
449
  {
450
- "epoch": 18.25,
451
- "learning_rate": 3.068421052631579e-05,
452
- "loss": 0.0629,
453
- "step": 470
454
  },
455
  {
456
  "epoch": 18.64,
457
- "learning_rate": 3.010526315789474e-05,
458
- "loss": 0.055,
459
- "step": 480
460
  },
461
  {
462
  "epoch": 18.99,
463
- "eval_accuracy": 0.782608695652174,
464
- "eval_loss": 0.8250147104263306,
465
- "eval_runtime": 0.7762,
466
- "eval_samples_per_second": 59.264,
467
- "eval_steps_per_second": 7.73,
468
- "step": 489
469
- },
470
- {
471
- "epoch": 19.03,
472
- "learning_rate": 2.9526315789473684e-05,
473
- "loss": 0.1095,
474
- "step": 490
475
  },
476
  {
477
- "epoch": 19.42,
478
- "learning_rate": 2.894736842105263e-05,
479
- "loss": 0.07,
480
- "step": 500
481
  },
482
  {
483
  "epoch": 19.81,
484
- "learning_rate": 2.8368421052631583e-05,
485
- "loss": 0.0753,
486
- "step": 510
487
- },
488
- {
489
- "epoch": 20.0,
490
- "eval_accuracy": 0.8478260869565217,
491
- "eval_loss": 0.6865554451942444,
492
- "eval_runtime": 0.8217,
493
- "eval_samples_per_second": 55.981,
494
- "eval_steps_per_second": 7.302,
495
- "step": 515
496
  },
497
  {
498
- "epoch": 20.19,
499
- "learning_rate": 2.778947368421053e-05,
500
- "loss": 0.1031,
501
- "step": 520
 
 
 
502
  },
503
  {
504
- "epoch": 20.58,
505
- "learning_rate": 2.7210526315789472e-05,
506
- "loss": 0.0668,
507
- "step": 530
508
  },
509
  {
510
  "epoch": 20.97,
511
- "learning_rate": 2.6631578947368422e-05,
512
- "loss": 0.14,
513
- "step": 540
514
  },
515
  {
516
  "epoch": 20.97,
517
- "eval_accuracy": 0.8695652173913043,
518
- "eval_loss": 0.6455863118171692,
519
- "eval_runtime": 0.8212,
520
- "eval_samples_per_second": 56.015,
521
- "eval_steps_per_second": 7.306,
522
- "step": 540
523
- },
524
- {
525
- "epoch": 21.36,
526
- "learning_rate": 2.605263157894737e-05,
527
- "loss": 0.0812,
528
- "step": 550
529
  },
530
  {
531
- "epoch": 21.75,
532
- "learning_rate": 2.5473684210526318e-05,
533
- "loss": 0.0506,
534
- "step": 560
535
  },
536
  {
537
- "epoch": 21.98,
538
- "eval_accuracy": 0.782608695652174,
539
- "eval_loss": 0.9126870632171631,
540
- "eval_runtime": 0.8397,
541
- "eval_samples_per_second": 54.781,
542
- "eval_steps_per_second": 7.145,
543
- "step": 566
544
  },
545
  {
546
  "epoch": 22.14,
547
- "learning_rate": 2.4894736842105264e-05,
548
- "loss": 0.0413,
549
- "step": 570
550
- },
551
- {
552
- "epoch": 22.52,
553
- "learning_rate": 2.431578947368421e-05,
554
- "loss": 0.1031,
555
- "step": 580
556
  },
557
  {
558
- "epoch": 22.91,
559
- "learning_rate": 2.373684210526316e-05,
560
- "loss": 0.0963,
561
- "step": 590
562
  },
563
  {
564
- "epoch": 22.99,
565
- "eval_accuracy": 0.8260869565217391,
566
- "eval_loss": 0.6365217566490173,
567
- "eval_runtime": 0.8112,
568
- "eval_samples_per_second": 56.706,
569
- "eval_steps_per_second": 7.396,
570
- "step": 592
571
  },
572
  {
573
  "epoch": 23.3,
574
- "learning_rate": 2.3157894736842103e-05,
575
- "loss": 0.0693,
576
- "step": 600
577
  },
578
  {
579
- "epoch": 23.69,
580
- "learning_rate": 2.2578947368421053e-05,
581
- "loss": 0.0612,
582
- "step": 610
583
  },
584
  {
585
  "epoch": 24.0,
586
  "eval_accuracy": 0.8043478260869565,
587
- "eval_loss": 0.8252330422401428,
588
- "eval_runtime": 0.8217,
589
- "eval_samples_per_second": 55.982,
590
- "eval_steps_per_second": 7.302,
591
- "step": 618
592
- },
593
- {
594
- "epoch": 24.08,
595
- "learning_rate": 2.2000000000000003e-05,
596
- "loss": 0.0353,
597
- "step": 620
598
  },
599
  {
600
  "epoch": 24.47,
601
- "learning_rate": 2.142105263157895e-05,
602
- "loss": 0.1415,
603
- "step": 630
604
- },
605
- {
606
- "epoch": 24.85,
607
- "learning_rate": 2.0842105263157895e-05,
608
- "loss": 0.0875,
609
- "step": 640
610
  },
611
  {
612
- "epoch": 24.97,
613
- "eval_accuracy": 0.7391304347826086,
614
- "eval_loss": 0.8843982815742493,
615
- "eval_runtime": 0.7792,
616
- "eval_samples_per_second": 59.035,
617
- "eval_steps_per_second": 7.7,
618
- "step": 643
619
  },
620
  {
621
- "epoch": 25.24,
622
- "learning_rate": 2.0263157894736842e-05,
623
- "loss": 0.1033,
624
- "step": 650
625
  },
626
  {
627
  "epoch": 25.63,
628
- "learning_rate": 1.968421052631579e-05,
629
- "loss": 0.1041,
630
- "step": 660
631
  },
632
  {
633
  "epoch": 25.98,
634
- "eval_accuracy": 0.8260869565217391,
635
- "eval_loss": 0.6594071984291077,
636
- "eval_runtime": 0.7782,
637
- "eval_samples_per_second": 59.112,
638
- "eval_steps_per_second": 7.71,
639
- "step": 669
640
- },
641
- {
642
- "epoch": 26.02,
643
- "learning_rate": 1.9105263157894738e-05,
644
- "loss": 0.0429,
645
- "step": 670
646
  },
647
  {
648
- "epoch": 26.41,
649
- "learning_rate": 1.8526315789473684e-05,
650
- "loss": 0.1199,
651
- "step": 680
652
  },
653
  {
654
  "epoch": 26.8,
655
- "learning_rate": 1.7947368421052634e-05,
656
- "loss": 0.0512,
657
- "step": 690
658
- },
659
- {
660
- "epoch": 26.99,
661
- "eval_accuracy": 0.782608695652174,
662
- "eval_loss": 0.9883273839950562,
663
- "eval_runtime": 0.7907,
664
- "eval_samples_per_second": 58.177,
665
- "eval_steps_per_second": 7.588,
666
- "step": 695
667
  },
668
  {
669
- "epoch": 27.18,
670
- "learning_rate": 1.736842105263158e-05,
671
- "loss": 0.0506,
672
- "step": 700
 
 
 
673
  },
674
  {
675
- "epoch": 27.57,
676
- "learning_rate": 1.6789473684210526e-05,
677
- "loss": 0.0676,
678
- "step": 710
679
  },
680
  {
681
  "epoch": 27.96,
682
- "learning_rate": 1.6210526315789473e-05,
683
- "loss": 0.0675,
684
- "step": 720
685
  },
686
  {
687
- "epoch": 28.0,
688
  "eval_accuracy": 0.8043478260869565,
689
- "eval_loss": 0.9216018915176392,
690
- "eval_runtime": 0.8032,
691
- "eval_samples_per_second": 57.272,
692
- "eval_steps_per_second": 7.47,
693
- "step": 721
694
- },
695
- {
696
- "epoch": 28.35,
697
- "learning_rate": 1.5631578947368422e-05,
698
- "loss": 0.0958,
699
- "step": 730
700
  },
701
  {
702
- "epoch": 28.74,
703
- "learning_rate": 1.505263157894737e-05,
704
- "loss": 0.0492,
705
- "step": 740
706
  },
707
  {
708
- "epoch": 28.97,
709
- "eval_accuracy": 0.8043478260869565,
710
- "eval_loss": 0.9284108877182007,
711
- "eval_runtime": 0.7852,
712
- "eval_samples_per_second": 58.586,
713
- "eval_steps_per_second": 7.642,
714
- "step": 746
715
  },
716
  {
717
  "epoch": 29.13,
718
- "learning_rate": 1.4473684210526315e-05,
719
- "loss": 0.0574,
720
- "step": 750
721
- },
722
- {
723
- "epoch": 29.51,
724
- "learning_rate": 1.3894736842105265e-05,
725
- "loss": 0.0527,
726
- "step": 760
727
  },
728
  {
729
- "epoch": 29.9,
730
- "learning_rate": 1.3315789473684211e-05,
731
- "loss": 0.0679,
732
- "step": 770
733
  },
734
  {
735
- "epoch": 29.98,
736
- "eval_accuracy": 0.782608695652174,
737
- "eval_loss": 0.9341010451316833,
738
- "eval_runtime": 0.8282,
739
- "eval_samples_per_second": 55.542,
740
- "eval_steps_per_second": 7.245,
741
- "step": 772
742
  },
743
  {
744
  "epoch": 30.29,
745
- "learning_rate": 1.2736842105263159e-05,
746
- "loss": 0.0248,
747
- "step": 780
748
  },
749
  {
750
- "epoch": 30.68,
751
- "learning_rate": 1.2157894736842105e-05,
752
- "loss": 0.0996,
753
- "step": 790
754
  },
755
  {
756
  "epoch": 30.99,
757
- "eval_accuracy": 0.8043478260869565,
758
- "eval_loss": 0.9608001112937927,
759
- "eval_runtime": 0.8052,
760
- "eval_samples_per_second": 57.13,
761
- "eval_steps_per_second": 7.452,
762
- "step": 798
763
- },
764
- {
765
- "epoch": 31.07,
766
- "learning_rate": 1.1578947368421052e-05,
767
- "loss": 0.0596,
768
- "step": 800
769
  },
770
  {
771
  "epoch": 31.46,
772
- "learning_rate": 1.1000000000000001e-05,
773
- "loss": 0.047,
774
- "step": 810
775
- },
776
- {
777
- "epoch": 31.84,
778
- "learning_rate": 1.0421052631578948e-05,
779
- "loss": 0.0729,
780
- "step": 820
781
  },
782
  {
783
- "epoch": 32.0,
784
- "eval_accuracy": 0.782608695652174,
785
- "eval_loss": 1.015515685081482,
786
- "eval_runtime": 0.7852,
787
- "eval_samples_per_second": 58.586,
788
- "eval_steps_per_second": 7.642,
789
- "step": 824
790
  },
791
  {
792
- "epoch": 32.23,
793
- "learning_rate": 9.842105263157896e-06,
794
- "loss": 0.0446,
795
- "step": 830
796
  },
797
  {
798
  "epoch": 32.62,
799
- "learning_rate": 9.263157894736842e-06,
800
- "loss": 0.0296,
801
- "step": 840
802
  },
803
  {
804
  "epoch": 32.97,
805
  "eval_accuracy": 0.782608695652174,
806
- "eval_loss": 1.0314323902130127,
807
- "eval_runtime": 0.7802,
808
- "eval_samples_per_second": 58.96,
809
- "eval_steps_per_second": 7.69,
810
- "step": 849
811
- },
812
- {
813
- "epoch": 33.01,
814
- "learning_rate": 8.68421052631579e-06,
815
- "loss": 0.0918,
816
- "step": 850
817
  },
818
  {
819
- "epoch": 33.4,
820
- "learning_rate": 8.105263157894736e-06,
821
- "loss": 0.0642,
822
- "step": 860
823
  },
824
  {
825
  "epoch": 33.79,
826
- "learning_rate": 7.526315789473685e-06,
827
- "loss": 0.0414,
828
- "step": 870
829
  },
830
  {
831
- "epoch": 33.98,
832
  "eval_accuracy": 0.8043478260869565,
833
- "eval_loss": 0.8358449339866638,
834
- "eval_runtime": 0.7882,
835
- "eval_samples_per_second": 58.362,
836
- "eval_steps_per_second": 7.612,
837
- "step": 875
838
- },
839
- {
840
- "epoch": 34.17,
841
- "learning_rate": 6.947368421052632e-06,
842
- "loss": 0.0482,
843
- "step": 880
844
  },
845
  {
846
- "epoch": 34.56,
847
- "learning_rate": 6.3684210526315795e-06,
848
- "loss": 0.0462,
849
- "step": 890
850
  },
851
  {
852
  "epoch": 34.95,
853
- "learning_rate": 5.789473684210526e-06,
854
- "loss": 0.04,
855
- "step": 900
856
- },
857
- {
858
- "epoch": 34.99,
859
- "eval_accuracy": 0.8043478260869565,
860
- "eval_loss": 0.891183614730835,
861
- "eval_runtime": 0.8252,
862
- "eval_samples_per_second": 55.745,
863
- "eval_steps_per_second": 7.271,
864
- "step": 901
865
  },
866
  {
867
- "epoch": 35.34,
868
- "learning_rate": 5.210526315789474e-06,
869
- "loss": 0.0262,
870
- "step": 910
 
 
 
871
  },
872
  {
873
- "epoch": 35.73,
874
- "learning_rate": 4.631578947368421e-06,
875
- "loss": 0.0179,
876
- "step": 920
877
  },
878
  {
879
  "epoch": 36.0,
880
  "eval_accuracy": 0.8043478260869565,
881
- "eval_loss": 0.8544241786003113,
882
- "eval_runtime": 0.7722,
883
- "eval_samples_per_second": 59.571,
884
- "eval_steps_per_second": 7.77,
885
- "step": 927
886
  },
887
  {
888
  "epoch": 36.12,
889
- "learning_rate": 4.052631578947368e-06,
890
- "loss": 0.0205,
891
- "step": 930
892
- },
893
- {
894
- "epoch": 36.5,
895
- "learning_rate": 3.473684210526316e-06,
896
- "loss": 0.0586,
897
- "step": 940
898
  },
899
  {
900
- "epoch": 36.89,
901
- "learning_rate": 2.894736842105263e-06,
902
- "loss": 0.0665,
903
- "step": 950
904
  },
905
  {
906
- "epoch": 36.97,
907
  "eval_accuracy": 0.8043478260869565,
908
- "eval_loss": 0.9153819680213928,
909
- "eval_runtime": 0.7792,
910
- "eval_samples_per_second": 59.037,
911
- "eval_steps_per_second": 7.7,
912
- "step": 952
913
  },
914
  {
915
  "epoch": 37.28,
916
- "learning_rate": 2.3157894736842105e-06,
917
- "loss": 0.0597,
918
- "step": 960
919
  },
920
  {
921
- "epoch": 37.67,
922
- "learning_rate": 1.736842105263158e-06,
923
- "loss": 0.0413,
924
- "step": 970
925
  },
926
  {
927
  "epoch": 37.98,
928
  "eval_accuracy": 0.8043478260869565,
929
- "eval_loss": 0.8834414482116699,
930
- "eval_runtime": 0.8242,
931
- "eval_samples_per_second": 55.81,
932
- "eval_steps_per_second": 7.28,
933
- "step": 978
934
  },
935
  {
936
- "epoch": 38.06,
937
- "learning_rate": 1.1578947368421053e-06,
938
- "loss": 0.0218,
939
- "step": 980
940
  },
941
  {
942
- "epoch": 38.45,
943
- "learning_rate": 5.789473684210526e-07,
944
- "loss": 0.0512,
945
- "step": 990
 
 
 
 
 
 
 
 
 
946
  },
947
  {
948
- "epoch": 38.83,
949
  "learning_rate": 0.0,
950
- "loss": 0.04,
951
- "step": 1000
952
  },
953
  {
954
- "epoch": 38.83,
955
  "eval_accuracy": 0.8043478260869565,
956
- "eval_loss": 0.8807706236839294,
957
- "eval_runtime": 0.8612,
958
- "eval_samples_per_second": 53.414,
959
- "eval_steps_per_second": 6.967,
960
- "step": 1000
961
- },
962
- {
963
- "epoch": 38.83,
964
- "step": 1000,
965
- "total_flos": 2.4650643760327066e+18,
966
- "train_loss": 0.2159135382026434,
967
- "train_runtime": 637.3873,
968
- "train_samples_per_second": 51.397,
969
- "train_steps_per_second": 1.569
970
  }
971
  ],
972
  "logging_steps": 10,
973
- "max_steps": 1000,
974
  "num_input_tokens_seen": 0,
975
  "num_train_epochs": 40,
976
  "save_steps": 500,
977
- "total_flos": 2.4650643760327066e+18,
978
  "train_batch_size": 8,
979
  "trial_name": null,
980
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8478260869565217,
3
+ "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-80RX\\checkpoint-103",
4
+ "epoch": 39.61165048543689,
5
  "eval_steps": 500,
6
+ "global_step": 680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.58,
13
+ "learning_rate": 1.6176470588235296e-05,
14
+ "loss": 1.3857,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.99,
19
+ "eval_accuracy": 0.5652173913043478,
20
+ "eval_loss": 1.370294451713562,
21
+ "eval_runtime": 0.8127,
22
+ "eval_samples_per_second": 56.602,
23
+ "eval_steps_per_second": 7.383,
24
+ "step": 17
 
 
 
 
 
 
25
  },
26
  {
27
  "epoch": 1.17,
28
+ "learning_rate": 3.235294117647059e-05,
29
+ "loss": 1.3738,
30
+ "step": 20
 
 
 
 
 
 
31
  },
32
  {
33
+ "epoch": 1.75,
34
+ "learning_rate": 4.8529411764705885e-05,
35
+ "loss": 1.3134,
36
+ "step": 30
37
  },
38
  {
39
  "epoch": 1.98,
40
+ "eval_accuracy": 0.45652173913043476,
41
+ "eval_loss": 1.2235466241836548,
42
+ "eval_runtime": 0.7992,
43
+ "eval_samples_per_second": 57.558,
44
+ "eval_steps_per_second": 7.508,
45
+ "step": 34
46
  },
47
  {
48
  "epoch": 2.33,
49
+ "learning_rate": 5.448916408668731e-05,
50
+ "loss": 1.1875,
51
+ "step": 40
 
 
 
 
 
 
52
  },
53
  {
54
+ "epoch": 2.91,
55
+ "learning_rate": 5.363777089783282e-05,
56
+ "loss": 1.0384,
57
+ "step": 50
 
 
 
58
  },
59
  {
60
+ "epoch": 2.97,
61
+ "eval_accuracy": 0.5434782608695652,
62
+ "eval_loss": 1.0173379182815552,
63
+ "eval_runtime": 0.8032,
64
+ "eval_samples_per_second": 57.271,
65
+ "eval_steps_per_second": 7.47,
66
+ "step": 51
67
  },
68
  {
69
  "epoch": 3.5,
70
+ "learning_rate": 5.278637770897833e-05,
71
+ "loss": 0.908,
72
+ "step": 60
 
 
 
 
 
 
73
  },
74
  {
75
+ "epoch": 3.96,
76
  "eval_accuracy": 0.782608695652174,
77
+ "eval_loss": 0.8346009254455566,
78
+ "eval_runtime": 0.7872,
79
+ "eval_samples_per_second": 58.435,
80
+ "eval_steps_per_second": 7.622,
81
+ "step": 68
82
  },
83
  {
84
+ "epoch": 4.08,
85
+ "learning_rate": 5.193498452012384e-05,
86
+ "loss": 0.8671,
87
+ "step": 70
88
  },
89
  {
90
  "epoch": 4.66,
91
+ "learning_rate": 5.108359133126935e-05,
92
+ "loss": 0.75,
93
+ "step": 80
94
  },
95
  {
96
+ "epoch": 4.95,
97
  "eval_accuracy": 0.782608695652174,
98
+ "eval_loss": 0.7342777252197266,
99
+ "eval_runtime": 0.8302,
100
+ "eval_samples_per_second": 55.408,
101
+ "eval_steps_per_second": 7.227,
102
+ "step": 85
 
 
 
 
 
 
103
  },
104
  {
105
+ "epoch": 5.24,
106
+ "learning_rate": 5.023219814241486e-05,
107
+ "loss": 0.6277,
108
+ "step": 90
109
  },
110
  {
111
  "epoch": 5.83,
112
+ "learning_rate": 4.9380804953560375e-05,
113
+ "loss": 0.5131,
114
+ "step": 100
 
 
 
 
 
 
 
 
 
115
  },
116
  {
117
+ "epoch": 6.0,
118
+ "eval_accuracy": 0.8478260869565217,
119
+ "eval_loss": 0.6099294424057007,
120
+ "eval_runtime": 0.8737,
121
+ "eval_samples_per_second": 52.649,
122
+ "eval_steps_per_second": 6.867,
123
+ "step": 103
124
  },
125
  {
126
+ "epoch": 6.41,
127
+ "learning_rate": 4.8529411764705885e-05,
128
+ "loss": 0.441,
129
+ "step": 110
130
  },
131
  {
132
  "epoch": 6.99,
133
+ "learning_rate": 4.7678018575851394e-05,
134
+ "loss": 0.395,
135
+ "step": 120
136
  },
137
  {
138
  "epoch": 6.99,
139
+ "eval_accuracy": 0.782608695652174,
140
+ "eval_loss": 0.5931932926177979,
141
+ "eval_runtime": 0.8642,
142
+ "eval_samples_per_second": 53.227,
143
+ "eval_steps_per_second": 6.943,
144
+ "step": 120
 
 
 
 
 
 
145
  },
146
  {
147
+ "epoch": 7.57,
148
+ "learning_rate": 4.6826625386996904e-05,
149
+ "loss": 0.355,
150
+ "step": 130
151
  },
152
  {
153
+ "epoch": 7.98,
154
+ "eval_accuracy": 0.7391304347826086,
155
+ "eval_loss": 0.7208946943283081,
156
+ "eval_runtime": 0.8322,
157
+ "eval_samples_per_second": 55.275,
158
+ "eval_steps_per_second": 7.21,
159
+ "step": 137
160
  },
161
  {
162
  "epoch": 8.16,
163
+ "learning_rate": 4.597523219814241e-05,
164
+ "loss": 0.339,
165
+ "step": 140
 
 
 
 
 
 
166
  },
167
  {
168
+ "epoch": 8.74,
169
+ "learning_rate": 4.512383900928793e-05,
170
+ "loss": 0.2658,
171
+ "step": 150
172
  },
173
  {
174
  "epoch": 8.97,
175
+ "eval_accuracy": 0.8043478260869565,
176
+ "eval_loss": 0.5652452707290649,
177
+ "eval_runtime": 0.7862,
178
+ "eval_samples_per_second": 58.511,
179
+ "eval_steps_per_second": 7.632,
180
+ "step": 154
181
  },
182
  {
183
  "epoch": 9.32,
184
+ "learning_rate": 4.427244582043344e-05,
185
+ "loss": 0.2305,
186
+ "step": 160
187
  },
188
  {
189
+ "epoch": 9.9,
190
  "learning_rate": 4.342105263157895e-05,
191
+ "loss": 0.248,
192
+ "step": 170
 
 
 
 
 
 
 
 
 
193
  },
194
  {
195
+ "epoch": 9.96,
196
+ "eval_accuracy": 0.782608695652174,
197
+ "eval_loss": 0.7103272080421448,
198
+ "eval_runtime": 0.8142,
199
+ "eval_samples_per_second": 56.498,
200
+ "eval_steps_per_second": 7.369,
201
+ "step": 171
202
  },
203
  {
204
  "epoch": 10.49,
205
+ "learning_rate": 4.2569659442724465e-05,
206
+ "loss": 0.2086,
207
+ "step": 180
 
 
 
 
 
 
208
  },
209
  {
210
+ "epoch": 10.95,
211
+ "eval_accuracy": 0.7608695652173914,
212
+ "eval_loss": 0.6788336038589478,
213
+ "eval_runtime": 0.8397,
214
+ "eval_samples_per_second": 54.782,
215
+ "eval_steps_per_second": 7.145,
216
+ "step": 188
217
  },
218
  {
219
+ "epoch": 11.07,
220
+ "learning_rate": 4.171826625386997e-05,
221
+ "loss": 0.2348,
222
+ "step": 190
223
  },
224
  {
225
  "epoch": 11.65,
226
+ "learning_rate": 4.0866873065015484e-05,
227
+ "loss": 0.1532,
228
+ "step": 200
229
  },
230
  {
231
  "epoch": 12.0,
232
  "eval_accuracy": 0.782608695652174,
233
+ "eval_loss": 0.5725099444389343,
234
+ "eval_runtime": 0.8672,
235
+ "eval_samples_per_second": 53.045,
236
+ "eval_steps_per_second": 6.919,
237
+ "step": 206
 
 
 
 
 
 
238
  },
239
  {
240
+ "epoch": 12.23,
241
+ "learning_rate": 4.001547987616099e-05,
242
+ "loss": 0.161,
243
+ "step": 210
244
  },
245
  {
246
  "epoch": 12.82,
247
+ "learning_rate": 3.91640866873065e-05,
248
+ "loss": 0.147,
249
+ "step": 220
 
 
 
 
 
 
 
 
 
250
  },
251
  {
252
+ "epoch": 12.99,
253
+ "eval_accuracy": 0.8043478260869565,
254
+ "eval_loss": 0.6129825115203857,
255
+ "eval_runtime": 0.7782,
256
+ "eval_samples_per_second": 59.112,
257
+ "eval_steps_per_second": 7.71,
258
+ "step": 223
259
  },
260
  {
261
+ "epoch": 13.4,
262
+ "learning_rate": 3.831269349845202e-05,
263
+ "loss": 0.1096,
264
+ "step": 230
265
  },
266
  {
267
  "epoch": 13.98,
268
+ "learning_rate": 3.746130030959752e-05,
269
+ "loss": 0.1145,
270
+ "step": 240
271
  },
272
  {
273
  "epoch": 13.98,
274
  "eval_accuracy": 0.8043478260869565,
275
+ "eval_loss": 0.656341552734375,
276
+ "eval_runtime": 0.7982,
277
+ "eval_samples_per_second": 57.63,
278
+ "eval_steps_per_second": 7.517,
279
+ "step": 240
 
 
 
 
 
 
280
  },
281
  {
282
+ "epoch": 14.56,
283
+ "learning_rate": 3.660990712074304e-05,
284
+ "loss": 0.1053,
285
+ "step": 250
286
  },
287
  {
288
+ "epoch": 14.97,
289
  "eval_accuracy": 0.8043478260869565,
290
+ "eval_loss": 0.5992771983146667,
291
+ "eval_runtime": 0.7932,
292
+ "eval_samples_per_second": 57.993,
293
+ "eval_steps_per_second": 7.564,
294
+ "step": 257
295
  },
296
  {
297
  "epoch": 15.15,
298
+ "learning_rate": 3.575851393188854e-05,
299
+ "loss": 0.1246,
300
+ "step": 260
 
 
 
 
 
 
301
  },
302
  {
303
+ "epoch": 15.73,
304
+ "learning_rate": 3.490712074303406e-05,
305
+ "loss": 0.0971,
306
+ "step": 270
307
  },
308
  {
309
+ "epoch": 15.96,
310
+ "eval_accuracy": 0.7391304347826086,
311
+ "eval_loss": 0.8839967250823975,
312
+ "eval_runtime": 0.8222,
313
+ "eval_samples_per_second": 55.947,
314
+ "eval_steps_per_second": 7.297,
315
+ "step": 274
316
  },
317
  {
318
  "epoch": 16.31,
319
+ "learning_rate": 3.405572755417957e-05,
320
+ "loss": 0.1263,
321
+ "step": 280
 
 
 
 
 
 
322
  },
323
  {
324
+ "epoch": 16.89,
325
+ "learning_rate": 3.3204334365325076e-05,
326
+ "loss": 0.0947,
327
+ "step": 290
 
 
 
328
  },
329
  {
330
+ "epoch": 16.95,
331
+ "eval_accuracy": 0.8043478260869565,
332
+ "eval_loss": 0.6255514025688171,
333
+ "eval_runtime": 0.8982,
334
+ "eval_samples_per_second": 51.213,
335
+ "eval_steps_per_second": 6.68,
336
+ "step": 291
337
  },
338
  {
339
  "epoch": 17.48,
340
+ "learning_rate": 3.235294117647059e-05,
341
+ "loss": 0.1055,
342
+ "step": 300
 
 
 
 
 
 
343
  },
344
  {
345
+ "epoch": 18.0,
346
+ "eval_accuracy": 0.7608695652173914,
347
+ "eval_loss": 0.8406044244766235,
348
+ "eval_runtime": 0.8512,
349
+ "eval_samples_per_second": 54.041,
350
+ "eval_steps_per_second": 7.049,
351
+ "step": 309
352
  },
353
  {
354
+ "epoch": 18.06,
355
+ "learning_rate": 3.1501547987616095e-05,
356
+ "loss": 0.1069,
357
+ "step": 310
358
  },
359
  {
360
  "epoch": 18.64,
361
+ "learning_rate": 3.065015479876161e-05,
362
+ "loss": 0.0974,
363
+ "step": 320
364
  },
365
  {
366
  "epoch": 18.99,
367
+ "eval_accuracy": 0.8478260869565217,
368
+ "eval_loss": 0.6354570984840393,
369
+ "eval_runtime": 0.9072,
370
+ "eval_samples_per_second": 50.705,
371
+ "eval_steps_per_second": 6.614,
372
+ "step": 326
 
 
 
 
 
 
373
  },
374
  {
375
+ "epoch": 19.22,
376
+ "learning_rate": 2.9798761609907124e-05,
377
+ "loss": 0.1043,
378
+ "step": 330
379
  },
380
  {
381
  "epoch": 19.81,
382
+ "learning_rate": 2.894736842105263e-05,
383
+ "loss": 0.1215,
384
+ "step": 340
 
 
 
 
 
 
 
 
 
385
  },
386
  {
387
+ "epoch": 19.98,
388
+ "eval_accuracy": 0.8043478260869565,
389
+ "eval_loss": 0.6650977730751038,
390
+ "eval_runtime": 0.7947,
391
+ "eval_samples_per_second": 57.885,
392
+ "eval_steps_per_second": 7.55,
393
+ "step": 343
394
  },
395
  {
396
+ "epoch": 20.39,
397
+ "learning_rate": 2.8095975232198143e-05,
398
+ "loss": 0.0847,
399
+ "step": 350
400
  },
401
  {
402
  "epoch": 20.97,
403
+ "learning_rate": 2.7244582043343656e-05,
404
+ "loss": 0.108,
405
+ "step": 360
406
  },
407
  {
408
  "epoch": 20.97,
409
+ "eval_accuracy": 0.782608695652174,
410
+ "eval_loss": 0.8301287889480591,
411
+ "eval_runtime": 0.8602,
412
+ "eval_samples_per_second": 53.476,
413
+ "eval_steps_per_second": 6.975,
414
+ "step": 360
 
 
 
 
 
 
415
  },
416
  {
417
+ "epoch": 21.55,
418
+ "learning_rate": 2.6393188854489165e-05,
419
+ "loss": 0.0784,
420
+ "step": 370
421
  },
422
  {
423
+ "epoch": 21.96,
424
+ "eval_accuracy": 0.7608695652173914,
425
+ "eval_loss": 0.8837165832519531,
426
+ "eval_runtime": 0.8957,
427
+ "eval_samples_per_second": 51.356,
428
+ "eval_steps_per_second": 6.699,
429
+ "step": 377
430
  },
431
  {
432
  "epoch": 22.14,
433
+ "learning_rate": 2.5541795665634675e-05,
434
+ "loss": 0.0725,
435
+ "step": 380
 
 
 
 
 
 
436
  },
437
  {
438
+ "epoch": 22.72,
439
+ "learning_rate": 2.4690402476780188e-05,
440
+ "loss": 0.0919,
441
+ "step": 390
442
  },
443
  {
444
+ "epoch": 22.95,
445
+ "eval_accuracy": 0.8043478260869565,
446
+ "eval_loss": 0.6985116004943848,
447
+ "eval_runtime": 0.8562,
448
+ "eval_samples_per_second": 53.726,
449
+ "eval_steps_per_second": 7.008,
450
+ "step": 394
451
  },
452
  {
453
  "epoch": 23.3,
454
+ "learning_rate": 2.3839009287925697e-05,
455
+ "loss": 0.0696,
456
+ "step": 400
457
  },
458
  {
459
+ "epoch": 23.88,
460
+ "learning_rate": 2.2987616099071207e-05,
461
+ "loss": 0.064,
462
+ "step": 410
463
  },
464
  {
465
  "epoch": 24.0,
466
  "eval_accuracy": 0.8043478260869565,
467
+ "eval_loss": 0.6426486968994141,
468
+ "eval_runtime": 0.9203,
469
+ "eval_samples_per_second": 49.983,
470
+ "eval_steps_per_second": 6.519,
471
+ "step": 412
 
 
 
 
 
 
472
  },
473
  {
474
  "epoch": 24.47,
475
+ "learning_rate": 2.213622291021672e-05,
476
+ "loss": 0.0669,
477
+ "step": 420
 
 
 
 
 
 
478
  },
479
  {
480
+ "epoch": 24.99,
481
+ "eval_accuracy": 0.782608695652174,
482
+ "eval_loss": 0.8101781010627747,
483
+ "eval_runtime": 0.8456,
484
+ "eval_samples_per_second": 54.397,
485
+ "eval_steps_per_second": 7.095,
486
+ "step": 429
487
  },
488
  {
489
+ "epoch": 25.05,
490
+ "learning_rate": 2.1284829721362232e-05,
491
+ "loss": 0.0994,
492
+ "step": 430
493
  },
494
  {
495
  "epoch": 25.63,
496
+ "learning_rate": 2.0433436532507742e-05,
497
+ "loss": 0.0878,
498
+ "step": 440
499
  },
500
  {
501
  "epoch": 25.98,
502
+ "eval_accuracy": 0.7391304347826086,
503
+ "eval_loss": 0.7863481640815735,
504
+ "eval_runtime": 0.8658,
505
+ "eval_samples_per_second": 53.128,
506
+ "eval_steps_per_second": 6.93,
507
+ "step": 446
 
 
 
 
 
 
508
  },
509
  {
510
+ "epoch": 26.21,
511
+ "learning_rate": 1.958204334365325e-05,
512
+ "loss": 0.0725,
513
+ "step": 450
514
  },
515
  {
516
  "epoch": 26.8,
517
+ "learning_rate": 1.873065015479876e-05,
518
+ "loss": 0.0875,
519
+ "step": 460
 
 
 
 
 
 
 
 
 
520
  },
521
  {
522
+ "epoch": 26.97,
523
+ "eval_accuracy": 0.7608695652173914,
524
+ "eval_loss": 0.8777241706848145,
525
+ "eval_runtime": 0.8092,
526
+ "eval_samples_per_second": 56.848,
527
+ "eval_steps_per_second": 7.415,
528
+ "step": 463
529
  },
530
  {
531
+ "epoch": 27.38,
532
+ "learning_rate": 1.787925696594427e-05,
533
+ "loss": 0.0641,
534
+ "step": 470
535
  },
536
  {
537
  "epoch": 27.96,
538
+ "learning_rate": 1.7027863777089787e-05,
539
+ "loss": 0.0441,
540
+ "step": 480
541
  },
542
  {
543
+ "epoch": 27.96,
544
  "eval_accuracy": 0.8043478260869565,
545
+ "eval_loss": 0.7324451208114624,
546
+ "eval_runtime": 0.8494,
547
+ "eval_samples_per_second": 54.159,
548
+ "eval_steps_per_second": 7.064,
549
+ "step": 480
 
 
 
 
 
 
550
  },
551
  {
552
+ "epoch": 28.54,
553
+ "learning_rate": 1.6176470588235296e-05,
554
+ "loss": 0.088,
555
+ "step": 490
556
  },
557
  {
558
+ "epoch": 28.95,
559
+ "eval_accuracy": 0.782608695652174,
560
+ "eval_loss": 0.8099210858345032,
561
+ "eval_runtime": 0.8292,
562
+ "eval_samples_per_second": 55.476,
563
+ "eval_steps_per_second": 7.236,
564
+ "step": 497
565
  },
566
  {
567
  "epoch": 29.13,
568
+ "learning_rate": 1.5325077399380806e-05,
569
+ "loss": 0.0691,
570
+ "step": 500
 
 
 
 
 
 
571
  },
572
  {
573
+ "epoch": 29.71,
574
+ "learning_rate": 1.4473684210526315e-05,
575
+ "loss": 0.0739,
576
+ "step": 510
577
  },
578
  {
579
+ "epoch": 30.0,
580
+ "eval_accuracy": 0.8043478260869565,
581
+ "eval_loss": 0.7775710821151733,
582
+ "eval_runtime": 0.8582,
583
+ "eval_samples_per_second": 53.601,
584
+ "eval_steps_per_second": 6.991,
585
+ "step": 515
586
  },
587
  {
588
  "epoch": 30.29,
589
+ "learning_rate": 1.3622291021671828e-05,
590
+ "loss": 0.0447,
591
+ "step": 520
592
  },
593
  {
594
+ "epoch": 30.87,
595
+ "learning_rate": 1.2770897832817337e-05,
596
+ "loss": 0.0598,
597
+ "step": 530
598
  },
599
  {
600
  "epoch": 30.99,
601
+ "eval_accuracy": 0.782608695652174,
602
+ "eval_loss": 0.8187718391418457,
603
+ "eval_runtime": 0.8752,
604
+ "eval_samples_per_second": 52.558,
605
+ "eval_steps_per_second": 6.855,
606
+ "step": 532
 
 
 
 
 
 
607
  },
608
  {
609
  "epoch": 31.46,
610
+ "learning_rate": 1.1919504643962849e-05,
611
+ "loss": 0.0443,
612
+ "step": 540
 
 
 
 
 
 
613
  },
614
  {
615
+ "epoch": 31.98,
616
+ "eval_accuracy": 0.8043478260869565,
617
+ "eval_loss": 0.8549073934555054,
618
+ "eval_runtime": 0.9272,
619
+ "eval_samples_per_second": 49.611,
620
+ "eval_steps_per_second": 6.471,
621
+ "step": 549
622
  },
623
  {
624
+ "epoch": 32.04,
625
+ "learning_rate": 1.106811145510836e-05,
626
+ "loss": 0.0577,
627
+ "step": 550
628
  },
629
  {
630
  "epoch": 32.62,
631
+ "learning_rate": 1.0216718266253871e-05,
632
+ "loss": 0.0376,
633
+ "step": 560
634
  },
635
  {
636
  "epoch": 32.97,
637
  "eval_accuracy": 0.782608695652174,
638
+ "eval_loss": 0.8048975467681885,
639
+ "eval_runtime": 0.7812,
640
+ "eval_samples_per_second": 58.886,
641
+ "eval_steps_per_second": 7.681,
642
+ "step": 566
 
 
 
 
 
 
643
  },
644
  {
645
+ "epoch": 33.2,
646
+ "learning_rate": 9.36532507739938e-06,
647
+ "loss": 0.0406,
648
+ "step": 570
649
  },
650
  {
651
  "epoch": 33.79,
652
+ "learning_rate": 8.513931888544893e-06,
653
+ "loss": 0.0375,
654
+ "step": 580
655
  },
656
  {
657
+ "epoch": 33.96,
658
  "eval_accuracy": 0.8043478260869565,
659
+ "eval_loss": 0.8037001490592957,
660
+ "eval_runtime": 0.8452,
661
+ "eval_samples_per_second": 54.424,
662
+ "eval_steps_per_second": 7.099,
663
+ "step": 583
 
 
 
 
 
 
664
  },
665
  {
666
+ "epoch": 34.37,
667
+ "learning_rate": 7.662538699690403e-06,
668
+ "loss": 0.0485,
669
+ "step": 590
670
  },
671
  {
672
  "epoch": 34.95,
673
+ "learning_rate": 6.811145510835914e-06,
674
+ "loss": 0.0346,
675
+ "step": 600
 
 
 
 
 
 
 
 
 
676
  },
677
  {
678
+ "epoch": 34.95,
679
+ "eval_accuracy": 0.8260869565217391,
680
+ "eval_loss": 0.8255174160003662,
681
+ "eval_runtime": 0.8432,
682
+ "eval_samples_per_second": 54.555,
683
+ "eval_steps_per_second": 7.116,
684
+ "step": 600
685
  },
686
  {
687
+ "epoch": 35.53,
688
+ "learning_rate": 5.959752321981424e-06,
689
+ "loss": 0.0471,
690
+ "step": 610
691
  },
692
  {
693
  "epoch": 36.0,
694
  "eval_accuracy": 0.8043478260869565,
695
+ "eval_loss": 0.823909342288971,
696
+ "eval_runtime": 0.8722,
697
+ "eval_samples_per_second": 52.74,
698
+ "eval_steps_per_second": 6.879,
699
+ "step": 618
700
  },
701
  {
702
  "epoch": 36.12,
703
+ "learning_rate": 5.1083591331269355e-06,
704
+ "loss": 0.0285,
705
+ "step": 620
 
 
 
 
 
 
706
  },
707
  {
708
+ "epoch": 36.7,
709
+ "learning_rate": 4.256965944272447e-06,
710
+ "loss": 0.0669,
711
+ "step": 630
712
  },
713
  {
714
+ "epoch": 36.99,
715
  "eval_accuracy": 0.8043478260869565,
716
+ "eval_loss": 0.8187545537948608,
717
+ "eval_runtime": 0.8572,
718
+ "eval_samples_per_second": 53.663,
719
+ "eval_steps_per_second": 6.999,
720
+ "step": 635
721
  },
722
  {
723
  "epoch": 37.28,
724
+ "learning_rate": 3.405572755417957e-06,
725
+ "loss": 0.0386,
726
+ "step": 640
727
  },
728
  {
729
+ "epoch": 37.86,
730
+ "learning_rate": 2.5541795665634677e-06,
731
+ "loss": 0.0438,
732
+ "step": 650
733
  },
734
  {
735
  "epoch": 37.98,
736
  "eval_accuracy": 0.8043478260869565,
737
+ "eval_loss": 0.8443450927734375,
738
+ "eval_runtime": 0.8317,
739
+ "eval_samples_per_second": 55.309,
740
+ "eval_steps_per_second": 7.214,
741
+ "step": 652
742
  },
743
  {
744
+ "epoch": 38.45,
745
+ "learning_rate": 1.7027863777089785e-06,
746
+ "loss": 0.0549,
747
+ "step": 660
748
  },
749
  {
750
+ "epoch": 38.97,
751
+ "eval_accuracy": 0.8043478260869565,
752
+ "eval_loss": 0.8550635576248169,
753
+ "eval_runtime": 0.8407,
754
+ "eval_samples_per_second": 54.716,
755
+ "eval_steps_per_second": 7.137,
756
+ "step": 669
757
+ },
758
+ {
759
+ "epoch": 39.03,
760
+ "learning_rate": 8.513931888544892e-07,
761
+ "loss": 0.0368,
762
+ "step": 670
763
  },
764
  {
765
+ "epoch": 39.61,
766
  "learning_rate": 0.0,
767
+ "loss": 0.0622,
768
+ "step": 680
769
  },
770
  {
771
+ "epoch": 39.61,
772
  "eval_accuracy": 0.8043478260869565,
773
+ "eval_loss": 0.8551087379455566,
774
+ "eval_runtime": 0.7647,
775
+ "eval_samples_per_second": 60.156,
776
+ "eval_steps_per_second": 7.846,
777
+ "step": 680
778
+ },
779
+ {
780
+ "epoch": 39.61,
781
+ "step": 680,
782
+ "total_flos": 2.5142726714989363e+18,
783
+ "train_loss": 0.24349691933568787,
784
+ "train_runtime": 652.5132,
785
+ "train_samples_per_second": 50.206,
786
+ "train_steps_per_second": 1.042
787
  }
788
  ],
789
  "logging_steps": 10,
790
+ "max_steps": 680,
791
  "num_input_tokens_seen": 0,
792
  "num_train_epochs": 40,
793
  "save_steps": 500,
794
+ "total_flos": 2.5142726714989363e+18,
795
  "train_batch_size": 8,
796
  "trial_name": null,
797
  "trial_params": null