navradio commited on
Commit
33c9ae5
1 Parent(s): 28f0621

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 29.77,
3
- "total_flos": 1.2944501944054088e+18,
4
- "train_loss": 0.5755378539870385,
5
- "train_runtime": 78104.0618,
6
- "train_samples_per_second": 38.759,
7
- "train_steps_per_second": 0.038
8
  }
 
1
  {
2
+ "epoch": 29.57,
3
+ "total_flos": 1.2835906128174883e+19,
4
+ "train_loss": 0.46080853518317727,
5
+ "train_runtime": 3932.5078,
6
+ "train_samples_per_second": 133.22,
7
+ "train_steps_per_second": 0.13
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dc531b0365295df461136c603980a83e788e1115b1b222654ecc5c5f32fba2d
3
  size 110394865
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d775242790b26922a803a30e3e6e6002ff27ae00b7347e1cdabf391f374a77
3
  size 110394865
train_results.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
- "epoch": 29.77,
3
- "train_loss": 0.5755378539870385,
4
- "train_runtime": 78104.0618,
5
- "train_samples_per_second": 38.759,
6
- "train_steps_per_second": 0.038
 
7
  }
 
1
  {
2
+ "epoch": 29.57,
3
+ "total_flos": 1.2835906128174883e+19,
4
+ "train_loss": 0.46080853518317727,
5
+ "train_runtime": 3932.5078,
6
+ "train_samples_per_second": 133.22,
7
+ "train_steps_per_second": 0.13
8
  }
trainer_state.json CHANGED
@@ -1,2059 +1,601 @@
1
  {
2
- "best_metric": 0.7344809133071709,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1678",
4
- "epoch": 29.772151898734176,
5
- "global_step": 2940,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.1,
12
- "learning_rate": 1.7006802721088438e-06,
13
- "loss": 0.7145,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.2,
18
- "learning_rate": 3.4013605442176877e-06,
19
- "loss": 0.6942,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.3,
24
- "learning_rate": 5.102040816326531e-06,
25
- "loss": 0.6747,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.41,
30
- "learning_rate": 6.802721088435375e-06,
31
- "loss": 0.6626,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.51,
36
- "learning_rate": 8.503401360544217e-06,
37
- "loss": 0.645,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.61,
42
- "learning_rate": 1.0204081632653061e-05,
43
- "loss": 0.643,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.71,
48
- "learning_rate": 1.1904761904761905e-05,
49
- "loss": 0.6353,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.81,
54
- "learning_rate": 1.360544217687075e-05,
55
- "loss": 0.6242,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.91,
60
- "learning_rate": 1.5306122448979594e-05,
61
- "loss": 0.6271,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.99,
66
- "eval_accuracy": 0.6925615412058509,
67
- "eval_loss": 0.6034993529319763,
68
- "eval_runtime": 291.7633,
69
- "eval_samples_per_second": 38.428,
70
- "eval_steps_per_second": 0.151,
71
- "step": 98
72
- },
73
- {
74
- "epoch": 1.01,
75
- "learning_rate": 1.7006802721088435e-05,
76
- "loss": 0.625,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 1.11,
81
- "learning_rate": 1.8707482993197282e-05,
82
- "loss": 0.6206,
83
- "step": 110
84
- },
85
- {
86
- "epoch": 1.22,
87
- "learning_rate": 2.0408163265306123e-05,
88
- "loss": 0.6208,
89
- "step": 120
90
- },
91
- {
92
- "epoch": 1.32,
93
- "learning_rate": 2.2108843537414966e-05,
94
- "loss": 0.625,
95
- "step": 130
96
- },
97
- {
98
- "epoch": 1.42,
99
- "learning_rate": 2.380952380952381e-05,
100
- "loss": 0.6198,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 1.52,
105
- "learning_rate": 2.5510204081632654e-05,
106
- "loss": 0.613,
107
- "step": 150
108
- },
109
- {
110
- "epoch": 1.62,
111
- "learning_rate": 2.72108843537415e-05,
112
- "loss": 0.6242,
113
- "step": 160
114
- },
115
- {
116
- "epoch": 1.72,
117
- "learning_rate": 2.891156462585034e-05,
118
- "loss": 0.616,
119
- "step": 170
120
- },
121
- {
122
- "epoch": 1.82,
123
- "learning_rate": 3.061224489795919e-05,
124
- "loss": 0.6145,
125
- "step": 180
126
- },
127
- {
128
- "epoch": 1.92,
129
- "learning_rate": 3.231292517006803e-05,
130
- "loss": 0.6156,
131
- "step": 190
132
- },
133
- {
134
- "epoch": 1.99,
135
- "eval_accuracy": 0.7005886550124866,
136
- "eval_loss": 0.5843892097473145,
137
- "eval_runtime": 301.4707,
138
- "eval_samples_per_second": 37.191,
139
- "eval_steps_per_second": 0.146,
140
- "step": 197
141
- },
142
- {
143
- "epoch": 2.03,
144
- "learning_rate": 3.401360544217687e-05,
145
- "loss": 0.6069,
146
- "step": 200
147
- },
148
- {
149
- "epoch": 2.13,
150
- "learning_rate": 3.571428571428572e-05,
151
- "loss": 0.6087,
152
- "step": 210
153
- },
154
- {
155
- "epoch": 2.23,
156
- "learning_rate": 3.7414965986394564e-05,
157
- "loss": 0.6116,
158
- "step": 220
159
- },
160
- {
161
- "epoch": 2.33,
162
- "learning_rate": 3.9115646258503405e-05,
163
- "loss": 0.6085,
164
- "step": 230
165
- },
166
- {
167
- "epoch": 2.43,
168
- "learning_rate": 4.0816326530612245e-05,
169
- "loss": 0.6049,
170
- "step": 240
171
- },
172
- {
173
- "epoch": 2.53,
174
- "learning_rate": 4.2517006802721085e-05,
175
- "loss": 0.6013,
176
- "step": 250
177
- },
178
- {
179
- "epoch": 2.63,
180
- "learning_rate": 4.421768707482993e-05,
181
- "loss": 0.6009,
182
- "step": 260
183
- },
184
- {
185
- "epoch": 2.73,
186
- "learning_rate": 4.591836734693878e-05,
187
- "loss": 0.608,
188
- "step": 270
189
- },
190
- {
191
- "epoch": 2.84,
192
- "learning_rate": 4.761904761904762e-05,
193
- "loss": 0.6145,
194
- "step": 280
195
- },
196
- {
197
- "epoch": 2.94,
198
- "learning_rate": 4.931972789115647e-05,
199
- "loss": 0.6148,
200
- "step": 290
201
- },
202
- {
203
- "epoch": 3.0,
204
- "eval_accuracy": 0.7103995718872637,
205
- "eval_loss": 0.5758489966392517,
206
- "eval_runtime": 290.2528,
207
- "eval_samples_per_second": 38.628,
208
- "eval_steps_per_second": 0.152,
209
- "step": 296
210
- },
211
- {
212
- "epoch": 3.04,
213
- "learning_rate": 4.9886621315192745e-05,
214
- "loss": 0.5965,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 3.14,
219
- "learning_rate": 4.969765684051398e-05,
220
- "loss": 0.601,
221
- "step": 310
222
- },
223
- {
224
- "epoch": 3.24,
225
- "learning_rate": 4.9508692365835225e-05,
226
- "loss": 0.6087,
227
- "step": 320
228
- },
229
- {
230
- "epoch": 3.34,
231
- "learning_rate": 4.931972789115647e-05,
232
- "loss": 0.6063,
233
- "step": 330
234
- },
235
- {
236
- "epoch": 3.44,
237
- "learning_rate": 4.9130763416477704e-05,
238
- "loss": 0.6034,
239
- "step": 340
240
- },
241
- {
242
- "epoch": 3.54,
243
- "learning_rate": 4.894179894179895e-05,
244
- "loss": 0.5962,
245
- "step": 350
246
- },
247
- {
248
- "epoch": 3.65,
249
- "learning_rate": 4.875283446712018e-05,
250
- "loss": 0.605,
251
- "step": 360
252
- },
253
- {
254
- "epoch": 3.75,
255
- "learning_rate": 4.8563869992441426e-05,
256
- "loss": 0.5962,
257
- "step": 370
258
- },
259
- {
260
- "epoch": 3.85,
261
- "learning_rate": 4.837490551776266e-05,
262
- "loss": 0.5986,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 3.95,
267
- "learning_rate": 4.8185941043083905e-05,
268
- "loss": 0.6055,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 4.0,
273
- "eval_accuracy": 0.7014805565465573,
274
- "eval_loss": 0.5852587819099426,
275
- "eval_runtime": 287.2656,
276
- "eval_samples_per_second": 39.03,
277
- "eval_steps_per_second": 0.153,
278
- "step": 395
279
- },
280
- {
281
- "epoch": 4.05,
282
- "learning_rate": 4.799697656840514e-05,
283
- "loss": 0.597,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 4.15,
288
- "learning_rate": 4.7808012093726384e-05,
289
- "loss": 0.5931,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 4.25,
294
- "learning_rate": 4.761904761904762e-05,
295
- "loss": 0.5942,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 4.35,
300
- "learning_rate": 4.7430083144368857e-05,
301
- "loss": 0.5989,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 4.46,
306
- "learning_rate": 4.72411186696901e-05,
307
- "loss": 0.5947,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 4.56,
312
- "learning_rate": 4.7052154195011336e-05,
313
- "loss": 0.5972,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 4.66,
318
- "learning_rate": 4.686318972033258e-05,
319
- "loss": 0.5864,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 4.76,
324
- "learning_rate": 4.667422524565382e-05,
325
- "loss": 0.5975,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 4.86,
330
- "learning_rate": 4.648526077097506e-05,
331
- "loss": 0.598,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 4.96,
336
- "learning_rate": 4.62962962962963e-05,
337
- "loss": 0.5938,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 4.99,
342
- "eval_accuracy": 0.7103995718872637,
343
- "eval_loss": 0.5857925415039062,
344
- "eval_runtime": 270.8215,
345
- "eval_samples_per_second": 41.4,
346
- "eval_steps_per_second": 0.162,
347
- "step": 493
348
- },
349
- {
350
- "epoch": 5.06,
351
- "learning_rate": 4.610733182161754e-05,
352
- "loss": 0.5987,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 5.16,
357
- "learning_rate": 4.591836734693878e-05,
358
- "loss": 0.5889,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 5.27,
363
- "learning_rate": 4.5729402872260016e-05,
364
- "loss": 0.5908,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 5.37,
369
- "learning_rate": 4.554043839758126e-05,
370
- "loss": 0.5908,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 5.47,
375
- "learning_rate": 4.53514739229025e-05,
376
- "loss": 0.5871,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 5.57,
381
- "learning_rate": 4.516250944822374e-05,
382
- "loss": 0.593,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 5.67,
387
- "learning_rate": 4.4973544973544974e-05,
388
- "loss": 0.5891,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 5.77,
393
- "learning_rate": 4.478458049886621e-05,
394
- "loss": 0.594,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 5.87,
399
- "learning_rate": 4.4595616024187454e-05,
400
- "loss": 0.5924,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 5.97,
405
- "learning_rate": 4.4406651549508697e-05,
406
- "loss": 0.5878,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 5.99,
411
- "eval_accuracy": 0.7210132001427042,
412
- "eval_loss": 0.5630102753639221,
413
- "eval_runtime": 271.5122,
414
- "eval_samples_per_second": 41.295,
415
- "eval_steps_per_second": 0.162,
416
- "step": 592
417
- },
418
- {
419
- "epoch": 6.08,
420
- "learning_rate": 4.421768707482993e-05,
421
- "loss": 0.5898,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 6.18,
426
- "learning_rate": 4.4028722600151176e-05,
427
- "loss": 0.5872,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 6.28,
432
- "learning_rate": 4.383975812547241e-05,
433
- "loss": 0.5885,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 6.38,
438
- "learning_rate": 4.3650793650793655e-05,
439
- "loss": 0.5811,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 6.48,
444
- "learning_rate": 4.346182917611489e-05,
445
- "loss": 0.5859,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 6.58,
450
- "learning_rate": 4.3272864701436134e-05,
451
- "loss": 0.5914,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 6.68,
456
- "learning_rate": 4.308390022675737e-05,
457
- "loss": 0.5867,
458
- "step": 660
459
- },
460
- {
461
- "epoch": 6.78,
462
- "learning_rate": 4.289493575207861e-05,
463
- "loss": 0.5865,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 6.89,
468
- "learning_rate": 4.2705971277399856e-05,
469
- "loss": 0.5881,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 6.99,
474
- "learning_rate": 4.2517006802721085e-05,
475
- "loss": 0.5873,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 7.0,
480
- "eval_accuracy": 0.7235997145915091,
481
- "eval_loss": 0.5619792938232422,
482
- "eval_runtime": 267.8238,
483
- "eval_samples_per_second": 41.863,
484
- "eval_steps_per_second": 0.164,
485
- "step": 691
486
- },
487
- {
488
- "epoch": 7.09,
489
- "learning_rate": 4.232804232804233e-05,
490
- "loss": 0.5803,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 7.19,
495
- "learning_rate": 4.2139077853363565e-05,
496
- "loss": 0.5775,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 7.29,
501
- "learning_rate": 4.195011337868481e-05,
502
- "loss": 0.5849,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 7.39,
507
- "learning_rate": 4.176114890400605e-05,
508
- "loss": 0.5856,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 7.49,
513
- "learning_rate": 4.157218442932729e-05,
514
- "loss": 0.5843,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 7.59,
519
- "learning_rate": 4.138321995464853e-05,
520
- "loss": 0.5889,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 7.7,
525
- "learning_rate": 4.1194255479969766e-05,
526
- "loss": 0.5763,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 7.8,
531
- "learning_rate": 4.100529100529101e-05,
532
- "loss": 0.5833,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 7.9,
537
- "learning_rate": 4.0816326530612245e-05,
538
- "loss": 0.5781,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 8.0,
543
- "learning_rate": 4.062736205593349e-05,
544
- "loss": 0.5947,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 8.0,
549
- "eval_accuracy": 0.7195861576881912,
550
- "eval_loss": 0.5669898390769958,
551
- "eval_runtime": 271.8958,
552
- "eval_samples_per_second": 41.236,
553
- "eval_steps_per_second": 0.162,
554
- "step": 790
555
- },
556
- {
557
- "epoch": 8.1,
558
- "learning_rate": 4.043839758125473e-05,
559
- "loss": 0.5784,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 8.2,
564
- "learning_rate": 4.024943310657597e-05,
565
- "loss": 0.578,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 8.3,
570
- "learning_rate": 4.006046863189721e-05,
571
- "loss": 0.5888,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 8.41,
576
- "learning_rate": 3.987150415721844e-05,
577
- "loss": 0.5828,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 8.51,
582
- "learning_rate": 3.968253968253968e-05,
583
- "loss": 0.582,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 8.61,
588
- "learning_rate": 3.9493575207860925e-05,
589
- "loss": 0.5817,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 8.71,
594
- "learning_rate": 3.930461073318216e-05,
595
- "loss": 0.5729,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 8.81,
600
- "learning_rate": 3.9115646258503405e-05,
601
- "loss": 0.5831,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 8.91,
606
- "learning_rate": 3.892668178382464e-05,
607
- "loss": 0.5866,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 8.99,
612
- "eval_accuracy": 0.7265429896539422,
613
- "eval_loss": 0.5592203736305237,
614
- "eval_runtime": 270.4426,
615
- "eval_samples_per_second": 41.458,
616
- "eval_steps_per_second": 0.163,
617
- "step": 888
618
- },
619
- {
620
- "epoch": 9.01,
621
- "learning_rate": 3.8737717309145884e-05,
622
- "loss": 0.5808,
623
- "step": 890
624
- },
625
- {
626
- "epoch": 9.11,
627
- "learning_rate": 3.854875283446712e-05,
628
- "loss": 0.5863,
629
- "step": 900
630
- },
631
- {
632
- "epoch": 9.22,
633
- "learning_rate": 3.835978835978836e-05,
634
- "loss": 0.5783,
635
- "step": 910
636
- },
637
- {
638
- "epoch": 9.32,
639
- "learning_rate": 3.81708238851096e-05,
640
- "loss": 0.578,
641
- "step": 920
642
- },
643
- {
644
- "epoch": 9.42,
645
- "learning_rate": 3.798185941043084e-05,
646
- "loss": 0.569,
647
- "step": 930
648
- },
649
- {
650
- "epoch": 9.52,
651
- "learning_rate": 3.7792894935752085e-05,
652
- "loss": 0.5821,
653
- "step": 940
654
- },
655
- {
656
- "epoch": 9.62,
657
- "learning_rate": 3.760393046107332e-05,
658
- "loss": 0.5827,
659
- "step": 950
660
- },
661
- {
662
- "epoch": 9.72,
663
- "learning_rate": 3.7414965986394564e-05,
664
- "loss": 0.5817,
665
- "step": 960
666
- },
667
- {
668
- "epoch": 9.82,
669
- "learning_rate": 3.7226001511715794e-05,
670
- "loss": 0.5774,
671
- "step": 970
672
- },
673
- {
674
- "epoch": 9.92,
675
- "learning_rate": 3.7037037037037037e-05,
676
- "loss": 0.5807,
677
- "step": 980
678
- },
679
- {
680
- "epoch": 9.99,
681
- "eval_accuracy": 0.7253835176596504,
682
- "eval_loss": 0.557357132434845,
683
- "eval_runtime": 270.5413,
684
- "eval_samples_per_second": 41.443,
685
- "eval_steps_per_second": 0.163,
686
- "step": 987
687
- },
688
- {
689
- "epoch": 10.03,
690
- "learning_rate": 3.684807256235828e-05,
691
- "loss": 0.5883,
692
- "step": 990
693
- },
694
- {
695
- "epoch": 10.13,
696
- "learning_rate": 3.6659108087679516e-05,
697
- "loss": 0.5711,
698
- "step": 1000
699
- },
700
- {
701
- "epoch": 10.23,
702
- "learning_rate": 3.647014361300076e-05,
703
- "loss": 0.5794,
704
- "step": 1010
705
- },
706
- {
707
- "epoch": 10.33,
708
- "learning_rate": 3.6281179138321995e-05,
709
- "loss": 0.5869,
710
- "step": 1020
711
- },
712
- {
713
- "epoch": 10.43,
714
- "learning_rate": 3.609221466364324e-05,
715
- "loss": 0.584,
716
- "step": 1030
717
- },
718
- {
719
- "epoch": 10.53,
720
- "learning_rate": 3.5903250188964474e-05,
721
- "loss": 0.5716,
722
- "step": 1040
723
- },
724
- {
725
- "epoch": 10.63,
726
- "learning_rate": 3.571428571428572e-05,
727
- "loss": 0.5783,
728
- "step": 1050
729
- },
730
- {
731
- "epoch": 10.73,
732
- "learning_rate": 3.552532123960696e-05,
733
- "loss": 0.577,
734
- "step": 1060
735
- },
736
- {
737
- "epoch": 10.84,
738
- "learning_rate": 3.5336356764928196e-05,
739
- "loss": 0.5779,
740
- "step": 1070
741
- },
742
- {
743
- "epoch": 10.94,
744
- "learning_rate": 3.514739229024944e-05,
745
- "loss": 0.5764,
746
- "step": 1080
747
- },
748
- {
749
- "epoch": 11.0,
750
- "eval_accuracy": 0.7244916161255798,
751
- "eval_loss": 0.565514862537384,
752
- "eval_runtime": 273.5069,
753
- "eval_samples_per_second": 40.993,
754
- "eval_steps_per_second": 0.161,
755
- "step": 1086
756
- },
757
- {
758
- "epoch": 11.04,
759
- "learning_rate": 3.4958427815570675e-05,
760
- "loss": 0.5806,
761
- "step": 1090
762
- },
763
- {
764
- "epoch": 11.14,
765
- "learning_rate": 3.476946334089191e-05,
766
- "loss": 0.5779,
767
- "step": 1100
768
- },
769
- {
770
- "epoch": 11.24,
771
- "learning_rate": 3.4580498866213154e-05,
772
- "loss": 0.5799,
773
- "step": 1110
774
- },
775
- {
776
- "epoch": 11.34,
777
- "learning_rate": 3.439153439153439e-05,
778
- "loss": 0.5844,
779
- "step": 1120
780
- },
781
- {
782
- "epoch": 11.44,
783
- "learning_rate": 3.4202569916855634e-05,
784
- "loss": 0.5715,
785
- "step": 1130
786
- },
787
- {
788
- "epoch": 11.54,
789
- "learning_rate": 3.401360544217687e-05,
790
- "loss": 0.5809,
791
- "step": 1140
792
- },
793
- {
794
- "epoch": 11.65,
795
- "learning_rate": 3.382464096749811e-05,
796
- "loss": 0.5695,
797
- "step": 1150
798
- },
799
- {
800
- "epoch": 11.75,
801
- "learning_rate": 3.363567649281935e-05,
802
- "loss": 0.576,
803
- "step": 1160
804
- },
805
- {
806
- "epoch": 11.85,
807
- "learning_rate": 3.344671201814059e-05,
808
- "loss": 0.5736,
809
- "step": 1170
810
- },
811
- {
812
- "epoch": 11.95,
813
- "learning_rate": 3.325774754346183e-05,
814
- "loss": 0.5729,
815
- "step": 1180
816
- },
817
- {
818
- "epoch": 12.0,
819
- "eval_accuracy": 0.7236889047449162,
820
- "eval_loss": 0.5611264109611511,
821
- "eval_runtime": 268.1746,
822
- "eval_samples_per_second": 41.809,
823
- "eval_steps_per_second": 0.164,
824
- "step": 1185
825
- },
826
- {
827
- "epoch": 12.05,
828
- "learning_rate": 3.306878306878307e-05,
829
- "loss": 0.5775,
830
- "step": 1190
831
- },
832
- {
833
- "epoch": 12.15,
834
- "learning_rate": 3.2879818594104314e-05,
835
- "loss": 0.5774,
836
- "step": 1200
837
- },
838
- {
839
- "epoch": 12.25,
840
- "learning_rate": 3.269085411942555e-05,
841
- "loss": 0.5751,
842
- "step": 1210
843
- },
844
- {
845
- "epoch": 12.35,
846
- "learning_rate": 3.250188964474679e-05,
847
- "loss": 0.5767,
848
- "step": 1220
849
- },
850
- {
851
- "epoch": 12.46,
852
- "learning_rate": 3.231292517006803e-05,
853
- "loss": 0.5717,
854
- "step": 1230
855
- },
856
- {
857
- "epoch": 12.56,
858
- "learning_rate": 3.2123960695389265e-05,
859
- "loss": 0.5738,
860
- "step": 1240
861
- },
862
- {
863
- "epoch": 12.66,
864
- "learning_rate": 3.193499622071051e-05,
865
- "loss": 0.5749,
866
- "step": 1250
867
- },
868
- {
869
- "epoch": 12.76,
870
- "learning_rate": 3.1746031746031745e-05,
871
- "loss": 0.5741,
872
- "step": 1260
873
- },
874
- {
875
- "epoch": 12.86,
876
- "learning_rate": 3.155706727135299e-05,
877
- "loss": 0.5789,
878
- "step": 1270
879
- },
880
- {
881
- "epoch": 12.96,
882
- "learning_rate": 3.1368102796674224e-05,
883
- "loss": 0.577,
884
- "step": 1280
885
- },
886
- {
887
- "epoch": 12.99,
888
- "eval_accuracy": 0.7188726364609347,
889
- "eval_loss": 0.5702112913131714,
890
- "eval_runtime": 267.6574,
891
- "eval_samples_per_second": 41.889,
892
- "eval_steps_per_second": 0.164,
893
- "step": 1283
894
- },
895
- {
896
- "epoch": 13.06,
897
- "learning_rate": 3.117913832199547e-05,
898
- "loss": 0.5767,
899
- "step": 1290
900
- },
901
- {
902
- "epoch": 13.16,
903
- "learning_rate": 3.09901738473167e-05,
904
- "loss": 0.5727,
905
- "step": 1300
906
- },
907
- {
908
- "epoch": 13.27,
909
- "learning_rate": 3.0801209372637946e-05,
910
- "loss": 0.5702,
911
- "step": 1310
912
- },
913
- {
914
- "epoch": 13.37,
915
- "learning_rate": 3.061224489795919e-05,
916
- "loss": 0.5768,
917
- "step": 1320
918
- },
919
- {
920
- "epoch": 13.47,
921
- "learning_rate": 3.0423280423280425e-05,
922
- "loss": 0.5708,
923
- "step": 1330
924
- },
925
- {
926
- "epoch": 13.57,
927
- "learning_rate": 3.0234315948601665e-05,
928
- "loss": 0.5708,
929
- "step": 1340
930
- },
931
- {
932
- "epoch": 13.67,
933
- "learning_rate": 3.0045351473922904e-05,
934
- "loss": 0.5677,
935
- "step": 1350
936
- },
937
- {
938
- "epoch": 13.77,
939
- "learning_rate": 2.9856386999244147e-05,
940
- "loss": 0.5755,
941
- "step": 1360
942
- },
943
- {
944
- "epoch": 13.87,
945
- "learning_rate": 2.9667422524565387e-05,
946
- "loss": 0.5731,
947
- "step": 1370
948
- },
949
- {
950
- "epoch": 13.97,
951
- "learning_rate": 2.947845804988662e-05,
952
- "loss": 0.5702,
953
- "step": 1380
954
- },
955
- {
956
- "epoch": 13.99,
957
- "eval_accuracy": 0.7259186585800927,
958
- "eval_loss": 0.5587979555130005,
959
- "eval_runtime": 267.9851,
960
- "eval_samples_per_second": 41.838,
961
- "eval_steps_per_second": 0.164,
962
- "step": 1382
963
- },
964
- {
965
- "epoch": 14.08,
966
- "learning_rate": 2.928949357520786e-05,
967
- "loss": 0.5713,
968
- "step": 1390
969
- },
970
- {
971
- "epoch": 14.18,
972
- "learning_rate": 2.91005291005291e-05,
973
- "loss": 0.5648,
974
- "step": 1400
975
- },
976
- {
977
- "epoch": 14.28,
978
- "learning_rate": 2.891156462585034e-05,
979
- "loss": 0.5715,
980
- "step": 1410
981
- },
982
- {
983
- "epoch": 14.38,
984
- "learning_rate": 2.872260015117158e-05,
985
- "loss": 0.5711,
986
- "step": 1420
987
- },
988
- {
989
- "epoch": 14.48,
990
- "learning_rate": 2.853363567649282e-05,
991
- "loss": 0.5598,
992
- "step": 1430
993
- },
994
- {
995
- "epoch": 14.58,
996
- "learning_rate": 2.834467120181406e-05,
997
- "loss": 0.5746,
998
- "step": 1440
999
- },
1000
- {
1001
- "epoch": 14.68,
1002
- "learning_rate": 2.81557067271353e-05,
1003
- "loss": 0.5702,
1004
- "step": 1450
1005
- },
1006
- {
1007
- "epoch": 14.78,
1008
- "learning_rate": 2.796674225245654e-05,
1009
- "loss": 0.58,
1010
- "step": 1460
1011
- },
1012
- {
1013
- "epoch": 14.89,
1014
- "learning_rate": 2.777777777777778e-05,
1015
- "loss": 0.5706,
1016
- "step": 1470
1017
- },
1018
- {
1019
- "epoch": 14.99,
1020
- "learning_rate": 2.758881330309902e-05,
1021
- "loss": 0.5717,
1022
- "step": 1480
1023
- },
1024
- {
1025
- "epoch": 15.0,
1026
- "eval_accuracy": 0.7244024259721726,
1027
- "eval_loss": 0.5564510226249695,
1028
- "eval_runtime": 273.2108,
1029
- "eval_samples_per_second": 41.038,
1030
- "eval_steps_per_second": 0.161,
1031
- "step": 1481
1032
- },
1033
- {
1034
- "epoch": 15.09,
1035
- "learning_rate": 2.739984882842026e-05,
1036
- "loss": 0.5678,
1037
- "step": 1490
1038
  },
1039
  {
1040
- "epoch": 15.19,
1041
- "learning_rate": 2.72108843537415e-05,
1042
- "loss": 0.5653,
1043
- "step": 1500
1044
  },
1045
  {
1046
- "epoch": 15.29,
1047
- "learning_rate": 2.7021919879062734e-05,
1048
- "loss": 0.5599,
1049
- "step": 1510
1050
  },
1051
  {
1052
- "epoch": 15.39,
1053
- "learning_rate": 2.6832955404383974e-05,
1054
- "loss": 0.5745,
1055
- "step": 1520
 
 
 
1056
  },
1057
  {
1058
- "epoch": 15.49,
1059
- "learning_rate": 2.6643990929705213e-05,
1060
- "loss": 0.5701,
1061
- "step": 1530
1062
  },
1063
  {
1064
- "epoch": 15.59,
1065
- "learning_rate": 2.6455026455026456e-05,
1066
- "loss": 0.5715,
1067
- "step": 1540
1068
  },
1069
  {
1070
- "epoch": 15.7,
1071
- "learning_rate": 2.6266061980347696e-05,
1072
- "loss": 0.5677,
1073
- "step": 1550
 
 
 
1074
  },
1075
  {
1076
- "epoch": 15.8,
1077
- "learning_rate": 2.6077097505668935e-05,
1078
- "loss": 0.5749,
1079
- "step": 1560
1080
  },
1081
  {
1082
- "epoch": 15.9,
1083
- "learning_rate": 2.5888133030990175e-05,
1084
- "loss": 0.5741,
1085
- "step": 1570
 
 
 
1086
  },
1087
  {
1088
- "epoch": 16.0,
1089
- "learning_rate": 2.5699168556311414e-05,
1090
- "loss": 0.5646,
1091
- "step": 1580
1092
  },
1093
  {
1094
- "epoch": 16.0,
1095
- "eval_accuracy": 0.7302889760970389,
1096
- "eval_loss": 0.5536319017410278,
1097
- "eval_runtime": 270.439,
1098
- "eval_samples_per_second": 41.459,
1099
- "eval_steps_per_second": 0.163,
1100
- "step": 1580
1101
  },
1102
  {
1103
- "epoch": 16.1,
1104
- "learning_rate": 2.5510204081632654e-05,
1105
- "loss": 0.5717,
1106
- "step": 1590
 
 
 
1107
  },
1108
  {
1109
- "epoch": 16.2,
1110
- "learning_rate": 2.5321239606953894e-05,
1111
- "loss": 0.565,
1112
- "step": 1600
1113
  },
1114
  {
1115
- "epoch": 16.3,
1116
- "learning_rate": 2.5132275132275137e-05,
1117
- "loss": 0.5749,
1118
- "step": 1610
1119
  },
1120
  {
1121
- "epoch": 16.41,
1122
- "learning_rate": 2.4943310657596373e-05,
1123
- "loss": 0.5701,
1124
- "step": 1620
 
 
 
1125
  },
1126
  {
1127
- "epoch": 16.51,
1128
- "learning_rate": 2.4754346182917612e-05,
1129
- "loss": 0.5717,
1130
- "step": 1630
1131
  },
1132
  {
1133
- "epoch": 16.61,
1134
- "learning_rate": 2.4565381708238852e-05,
1135
- "loss": 0.5641,
1136
- "step": 1640
1137
  },
1138
  {
1139
- "epoch": 16.71,
1140
- "learning_rate": 2.437641723356009e-05,
1141
- "loss": 0.5663,
1142
- "step": 1650
 
 
 
1143
  },
1144
  {
1145
- "epoch": 16.81,
1146
- "learning_rate": 2.418745275888133e-05,
1147
- "loss": 0.5674,
1148
- "step": 1660
1149
  },
1150
  {
1151
- "epoch": 16.91,
1152
- "learning_rate": 2.399848828420257e-05,
1153
- "loss": 0.5591,
1154
- "step": 1670
 
 
 
1155
  },
1156
  {
1157
- "epoch": 16.99,
1158
- "eval_accuracy": 0.7344809133071709,
1159
- "eval_loss": 0.5525398254394531,
1160
- "eval_runtime": 269.9721,
1161
- "eval_samples_per_second": 41.53,
1162
- "eval_steps_per_second": 0.163,
1163
- "step": 1678
1164
  },
1165
  {
1166
- "epoch": 17.01,
1167
- "learning_rate": 2.380952380952381e-05,
1168
- "loss": 0.5754,
1169
- "step": 1680
1170
  },
1171
  {
1172
- "epoch": 17.11,
1173
- "learning_rate": 2.362055933484505e-05,
1174
- "loss": 0.5734,
1175
- "step": 1690
 
 
 
1176
  },
1177
  {
1178
- "epoch": 17.22,
1179
- "learning_rate": 2.343159486016629e-05,
1180
- "loss": 0.5671,
1181
- "step": 1700
1182
  },
1183
  {
1184
- "epoch": 17.32,
1185
- "learning_rate": 2.324263038548753e-05,
1186
- "loss": 0.5663,
1187
- "step": 1710
1188
  },
1189
  {
1190
- "epoch": 17.42,
1191
- "learning_rate": 2.305366591080877e-05,
1192
- "loss": 0.5707,
1193
- "step": 1720
 
 
 
1194
  },
1195
  {
1196
- "epoch": 17.52,
1197
- "learning_rate": 2.2864701436130008e-05,
1198
- "loss": 0.5635,
1199
- "step": 1730
1200
  },
1201
  {
1202
- "epoch": 17.62,
1203
- "learning_rate": 2.267573696145125e-05,
1204
- "loss": 0.569,
1205
- "step": 1740
 
 
 
1206
  },
1207
  {
1208
- "epoch": 17.72,
1209
- "learning_rate": 2.2486772486772487e-05,
1210
- "loss": 0.5675,
1211
- "step": 1750
1212
  },
1213
  {
1214
- "epoch": 17.82,
1215
- "learning_rate": 2.2297808012093727e-05,
1216
- "loss": 0.5623,
1217
- "step": 1760
1218
  },
1219
  {
1220
- "epoch": 17.92,
1221
- "learning_rate": 2.2108843537414966e-05,
1222
- "loss": 0.5586,
1223
- "step": 1770
 
 
 
1224
  },
1225
  {
1226
- "epoch": 17.99,
1227
- "eval_accuracy": 0.7285943631823046,
1228
- "eval_loss": 0.5565158724784851,
1229
- "eval_runtime": 267.0513,
1230
- "eval_samples_per_second": 41.984,
1231
- "eval_steps_per_second": 0.165,
1232
- "step": 1777
1233
  },
1234
  {
1235
- "epoch": 18.03,
1236
- "learning_rate": 2.1919879062736206e-05,
1237
- "loss": 0.5606,
1238
- "step": 1780
1239
  },
1240
  {
1241
- "epoch": 18.13,
1242
- "learning_rate": 2.1730914588057446e-05,
1243
- "loss": 0.5611,
1244
- "step": 1790
 
 
 
1245
  },
1246
  {
1247
- "epoch": 18.23,
1248
- "learning_rate": 2.1541950113378685e-05,
1249
- "loss": 0.5693,
1250
- "step": 1800
1251
  },
1252
  {
1253
- "epoch": 18.33,
1254
- "learning_rate": 2.1352985638699928e-05,
1255
- "loss": 0.559,
1256
- "step": 1810
1257
  },
1258
  {
1259
- "epoch": 18.43,
1260
- "learning_rate": 2.1164021164021164e-05,
1261
- "loss": 0.5673,
1262
- "step": 1820
 
 
 
1263
  },
1264
  {
1265
- "epoch": 18.53,
1266
- "learning_rate": 2.0975056689342404e-05,
1267
- "loss": 0.5635,
1268
- "step": 1830
1269
  },
1270
  {
1271
- "epoch": 18.63,
1272
- "learning_rate": 2.0786092214663643e-05,
1273
- "loss": 0.567,
1274
- "step": 1840
 
 
 
1275
  },
1276
  {
1277
- "epoch": 18.73,
1278
- "learning_rate": 2.0597127739984883e-05,
1279
- "loss": 0.5637,
1280
- "step": 1850
1281
  },
1282
  {
1283
- "epoch": 18.84,
1284
- "learning_rate": 2.0408163265306123e-05,
1285
- "loss": 0.5648,
1286
- "step": 1860
1287
  },
1288
  {
1289
- "epoch": 18.94,
1290
- "learning_rate": 2.0219198790627365e-05,
1291
- "loss": 0.5668,
1292
- "step": 1870
 
 
 
1293
  },
1294
  {
1295
- "epoch": 19.0,
1296
- "eval_accuracy": 0.7303781662504459,
1297
- "eval_loss": 0.5519587993621826,
1298
- "eval_runtime": 272.9351,
1299
- "eval_samples_per_second": 41.079,
1300
- "eval_steps_per_second": 0.161,
1301
- "step": 1876
1302
  },
1303
  {
1304
- "epoch": 19.04,
1305
- "learning_rate": 2.0030234315948605e-05,
1306
- "loss": 0.5628,
1307
- "step": 1880
1308
  },
1309
  {
1310
- "epoch": 19.14,
1311
- "learning_rate": 1.984126984126984e-05,
1312
- "loss": 0.5642,
1313
- "step": 1890
 
 
 
1314
  },
1315
  {
1316
- "epoch": 19.24,
1317
- "learning_rate": 1.965230536659108e-05,
1318
- "loss": 0.5563,
1319
- "step": 1900
1320
  },
1321
  {
1322
- "epoch": 19.34,
1323
- "learning_rate": 1.946334089191232e-05,
1324
- "loss": 0.5642,
1325
- "step": 1910
1326
  },
1327
  {
1328
- "epoch": 19.44,
1329
- "learning_rate": 1.927437641723356e-05,
1330
- "loss": 0.5618,
1331
- "step": 1920
 
 
 
1332
  },
1333
  {
1334
- "epoch": 19.54,
1335
- "learning_rate": 1.90854119425548e-05,
1336
- "loss": 0.5578,
1337
- "step": 1930
1338
  },
1339
  {
1340
- "epoch": 19.65,
1341
- "learning_rate": 1.8896447467876043e-05,
1342
- "loss": 0.5636,
1343
- "step": 1940
 
 
 
1344
  },
1345
  {
1346
- "epoch": 19.75,
1347
- "learning_rate": 1.8707482993197282e-05,
1348
- "loss": 0.5749,
1349
- "step": 1950
1350
  },
1351
  {
1352
- "epoch": 19.85,
1353
  "learning_rate": 1.8518518518518518e-05,
1354
- "loss": 0.5628,
1355
- "step": 1960
1356
- },
1357
- {
1358
- "epoch": 19.95,
1359
- "learning_rate": 1.8329554043839758e-05,
1360
- "loss": 0.5617,
1361
- "step": 1970
1362
  },
1363
  {
1364
  "epoch": 20.0,
1365
- "eval_accuracy": 0.7288619336425258,
1366
- "eval_loss": 0.5557389855384827,
1367
- "eval_runtime": 270.9737,
1368
- "eval_samples_per_second": 41.377,
1369
- "eval_steps_per_second": 0.162,
1370
- "step": 1975
1371
- },
1372
- {
1373
- "epoch": 20.05,
1374
- "learning_rate": 1.8140589569160997e-05,
1375
- "loss": 0.5541,
1376
- "step": 1980
1377
- },
1378
- {
1379
- "epoch": 20.15,
1380
- "learning_rate": 1.7951625094482237e-05,
1381
- "loss": 0.5693,
1382
- "step": 1990
1383
- },
1384
- {
1385
- "epoch": 20.25,
1386
- "learning_rate": 1.776266061980348e-05,
1387
- "loss": 0.5652,
1388
- "step": 2000
1389
  },
1390
  {
1391
- "epoch": 20.35,
1392
- "learning_rate": 1.757369614512472e-05,
1393
- "loss": 0.5579,
1394
- "step": 2010
1395
- },
1396
- {
1397
- "epoch": 20.46,
1398
- "learning_rate": 1.7384731670445956e-05,
1399
- "loss": 0.5597,
1400
- "step": 2020
1401
- },
1402
- {
1403
- "epoch": 20.56,
1404
- "learning_rate": 1.7195767195767195e-05,
1405
- "loss": 0.5674,
1406
- "step": 2030
1407
- },
1408
- {
1409
- "epoch": 20.66,
1410
- "learning_rate": 1.7006802721088435e-05,
1411
- "loss": 0.5634,
1412
- "step": 2040
1413
- },
1414
- {
1415
- "epoch": 20.76,
1416
- "learning_rate": 1.6817838246409674e-05,
1417
- "loss": 0.567,
1418
- "step": 2050
1419
- },
1420
- {
1421
- "epoch": 20.86,
1422
- "learning_rate": 1.6628873771730914e-05,
1423
- "loss": 0.5571,
1424
- "step": 2060
1425
  },
1426
  {
1427
- "epoch": 20.96,
1428
- "learning_rate": 1.6439909297052157e-05,
1429
- "loss": 0.5546,
1430
- "step": 2070
1431
  },
1432
  {
1433
  "epoch": 20.99,
1434
- "eval_accuracy": 0.7325187299322155,
1435
- "eval_loss": 0.5561436414718628,
1436
- "eval_runtime": 269.5801,
1437
- "eval_samples_per_second": 41.591,
1438
- "eval_steps_per_second": 0.163,
1439
- "step": 2073
1440
- },
1441
- {
1442
- "epoch": 21.06,
1443
- "learning_rate": 1.6250944822373397e-05,
1444
- "loss": 0.5589,
1445
- "step": 2080
1446
- },
1447
- {
1448
- "epoch": 21.16,
1449
- "learning_rate": 1.6061980347694633e-05,
1450
- "loss": 0.5658,
1451
- "step": 2090
1452
  },
1453
  {
1454
- "epoch": 21.27,
1455
- "learning_rate": 1.5873015873015872e-05,
1456
- "loss": 0.5596,
1457
- "step": 2100
1458
- },
1459
- {
1460
- "epoch": 21.37,
1461
- "learning_rate": 1.5684051398337112e-05,
1462
- "loss": 0.5618,
1463
- "step": 2110
1464
- },
1465
- {
1466
- "epoch": 21.47,
1467
- "learning_rate": 1.549508692365835e-05,
1468
- "loss": 0.5624,
1469
- "step": 2120
1470
- },
1471
- {
1472
- "epoch": 21.57,
1473
- "learning_rate": 1.5306122448979594e-05,
1474
- "loss": 0.5619,
1475
- "step": 2130
1476
- },
1477
- {
1478
- "epoch": 21.67,
1479
- "learning_rate": 1.5117157974300832e-05,
1480
- "loss": 0.5554,
1481
- "step": 2140
1482
- },
1483
- {
1484
- "epoch": 21.77,
1485
- "learning_rate": 1.4928193499622074e-05,
1486
- "loss": 0.5539,
1487
- "step": 2150
1488
- },
1489
- {
1490
- "epoch": 21.87,
1491
- "learning_rate": 1.473922902494331e-05,
1492
- "loss": 0.5657,
1493
- "step": 2160
1494
  },
1495
  {
1496
  "epoch": 21.97,
1497
- "learning_rate": 1.455026455026455e-05,
1498
- "loss": 0.5579,
1499
- "step": 2170
1500
- },
1501
- {
1502
- "epoch": 21.99,
1503
- "eval_accuracy": 0.7314484480913307,
1504
- "eval_loss": 0.5537222623825073,
1505
- "eval_runtime": 267.4411,
1506
- "eval_samples_per_second": 41.923,
1507
- "eval_steps_per_second": 0.165,
1508
- "step": 2172
1509
- },
1510
- {
1511
- "epoch": 22.08,
1512
- "learning_rate": 1.436130007558579e-05,
1513
- "loss": 0.554,
1514
- "step": 2180
1515
- },
1516
- {
1517
- "epoch": 22.18,
1518
- "learning_rate": 1.417233560090703e-05,
1519
- "loss": 0.569,
1520
- "step": 2190
1521
- },
1522
- {
1523
- "epoch": 22.28,
1524
- "learning_rate": 1.398337112622827e-05,
1525
- "loss": 0.5528,
1526
- "step": 2200
1527
- },
1528
- {
1529
- "epoch": 22.38,
1530
- "learning_rate": 1.379440665154951e-05,
1531
- "loss": 0.5649,
1532
- "step": 2210
1533
- },
1534
- {
1535
- "epoch": 22.48,
1536
- "learning_rate": 1.360544217687075e-05,
1537
- "loss": 0.5586,
1538
- "step": 2220
1539
- },
1540
- {
1541
- "epoch": 22.58,
1542
- "learning_rate": 1.3416477702191987e-05,
1543
- "loss": 0.5606,
1544
- "step": 2230
1545
- },
1546
- {
1547
- "epoch": 22.68,
1548
- "learning_rate": 1.3227513227513228e-05,
1549
- "loss": 0.5551,
1550
- "step": 2240
1551
- },
1552
- {
1553
- "epoch": 22.78,
1554
- "learning_rate": 1.3038548752834468e-05,
1555
- "loss": 0.5588,
1556
- "step": 2250
1557
- },
1558
- {
1559
- "epoch": 22.89,
1560
- "learning_rate": 1.2849584278155707e-05,
1561
- "loss": 0.5591,
1562
- "step": 2260
1563
  },
1564
  {
1565
- "epoch": 22.99,
1566
- "learning_rate": 1.2660619803476947e-05,
1567
- "loss": 0.5604,
1568
- "step": 2270
1569
  },
1570
  {
1571
- "epoch": 23.0,
1572
- "eval_accuracy": 0.72904031394934,
1573
- "eval_loss": 0.5545207262039185,
1574
- "eval_runtime": 270.2678,
1575
- "eval_samples_per_second": 41.485,
1576
- "eval_steps_per_second": 0.163,
1577
- "step": 2271
1578
  },
1579
  {
1580
- "epoch": 23.09,
1581
- "learning_rate": 1.2471655328798186e-05,
1582
- "loss": 0.5588,
1583
- "step": 2280
 
 
 
1584
  },
1585
  {
1586
  "epoch": 23.19,
1587
- "learning_rate": 1.2282690854119426e-05,
1588
- "loss": 0.5499,
1589
- "step": 2290
1590
- },
1591
- {
1592
- "epoch": 23.29,
1593
- "learning_rate": 1.2093726379440666e-05,
1594
- "loss": 0.5495,
1595
- "step": 2300
1596
- },
1597
- {
1598
- "epoch": 23.39,
1599
- "learning_rate": 1.1904761904761905e-05,
1600
- "loss": 0.5571,
1601
- "step": 2310
1602
- },
1603
- {
1604
- "epoch": 23.49,
1605
- "learning_rate": 1.1715797430083145e-05,
1606
- "loss": 0.5545,
1607
- "step": 2320
1608
- },
1609
- {
1610
- "epoch": 23.59,
1611
- "learning_rate": 1.1526832955404384e-05,
1612
- "loss": 0.5625,
1613
- "step": 2330
1614
- },
1615
- {
1616
- "epoch": 23.7,
1617
- "learning_rate": 1.1337868480725626e-05,
1618
- "loss": 0.5573,
1619
- "step": 2340
1620
- },
1621
- {
1622
- "epoch": 23.8,
1623
- "learning_rate": 1.1148904006046863e-05,
1624
- "loss": 0.5623,
1625
- "step": 2350
1626
- },
1627
- {
1628
- "epoch": 23.9,
1629
- "learning_rate": 1.0959939531368103e-05,
1630
- "loss": 0.5527,
1631
- "step": 2360
1632
  },
1633
  {
1634
- "epoch": 24.0,
1635
- "learning_rate": 1.0770975056689343e-05,
1636
- "loss": 0.5563,
1637
- "step": 2370
1638
  },
1639
  {
1640
  "epoch": 24.0,
1641
- "eval_accuracy": 0.7287727434891188,
1642
- "eval_loss": 0.5590741634368896,
1643
- "eval_runtime": 269.4039,
1644
- "eval_samples_per_second": 41.618,
1645
- "eval_steps_per_second": 0.163,
1646
- "step": 2370
1647
- },
1648
- {
1649
- "epoch": 24.1,
1650
- "learning_rate": 1.0582010582010582e-05,
1651
- "loss": 0.5586,
1652
- "step": 2380
1653
- },
1654
- {
1655
- "epoch": 24.2,
1656
- "learning_rate": 1.0393046107331822e-05,
1657
- "loss": 0.5424,
1658
- "step": 2390
1659
- },
1660
- {
1661
- "epoch": 24.3,
1662
- "learning_rate": 1.0204081632653061e-05,
1663
- "loss": 0.5598,
1664
- "step": 2400
1665
- },
1666
- {
1667
- "epoch": 24.41,
1668
- "learning_rate": 1.0015117157974303e-05,
1669
- "loss": 0.5528,
1670
- "step": 2410
1671
- },
1672
- {
1673
- "epoch": 24.51,
1674
- "learning_rate": 9.82615268329554e-06,
1675
- "loss": 0.5521,
1676
- "step": 2420
1677
- },
1678
- {
1679
- "epoch": 24.61,
1680
- "learning_rate": 9.63718820861678e-06,
1681
- "loss": 0.5524,
1682
- "step": 2430
1683
  },
1684
  {
1685
- "epoch": 24.71,
1686
- "learning_rate": 9.448223733938021e-06,
1687
- "loss": 0.5602,
1688
- "step": 2440
1689
- },
1690
- {
1691
- "epoch": 24.81,
1692
- "learning_rate": 9.259259259259259e-06,
1693
- "loss": 0.5608,
1694
- "step": 2450
1695
  },
1696
  {
1697
- "epoch": 24.91,
1698
- "learning_rate": 9.070294784580499e-06,
1699
- "loss": 0.5634,
1700
- "step": 2460
1701
  },
1702
  {
1703
  "epoch": 24.99,
1704
- "eval_accuracy": 0.7307349268640742,
1705
- "eval_loss": 0.5545657277107239,
1706
- "eval_runtime": 273.5576,
1707
- "eval_samples_per_second": 40.986,
1708
- "eval_steps_per_second": 0.161,
1709
- "step": 2468
1710
- },
1711
- {
1712
- "epoch": 25.01,
1713
- "learning_rate": 8.88133030990174e-06,
1714
- "loss": 0.5516,
1715
- "step": 2470
1716
- },
1717
- {
1718
- "epoch": 25.11,
1719
- "learning_rate": 8.692365835222978e-06,
1720
- "loss": 0.5489,
1721
- "step": 2480
1722
- },
1723
- {
1724
- "epoch": 25.22,
1725
- "learning_rate": 8.503401360544217e-06,
1726
- "loss": 0.5562,
1727
- "step": 2490
1728
- },
1729
- {
1730
- "epoch": 25.32,
1731
- "learning_rate": 8.314436885865457e-06,
1732
- "loss": 0.5498,
1733
- "step": 2500
1734
- },
1735
- {
1736
- "epoch": 25.42,
1737
- "learning_rate": 8.125472411186698e-06,
1738
- "loss": 0.5578,
1739
- "step": 2510
1740
- },
1741
- {
1742
- "epoch": 25.52,
1743
- "learning_rate": 7.936507936507936e-06,
1744
- "loss": 0.551,
1745
- "step": 2520
1746
- },
1747
- {
1748
- "epoch": 25.62,
1749
- "learning_rate": 7.747543461829176e-06,
1750
- "loss": 0.5572,
1751
- "step": 2530
1752
- },
1753
- {
1754
- "epoch": 25.72,
1755
- "learning_rate": 7.558578987150416e-06,
1756
- "loss": 0.5523,
1757
- "step": 2540
1758
- },
1759
- {
1760
- "epoch": 25.82,
1761
- "learning_rate": 7.369614512471655e-06,
1762
- "loss": 0.5525,
1763
- "step": 2550
1764
- },
1765
- {
1766
- "epoch": 25.92,
1767
- "learning_rate": 7.180650037792895e-06,
1768
- "loss": 0.5563,
1769
- "step": 2560
1770
- },
1771
- {
1772
- "epoch": 25.99,
1773
- "eval_accuracy": 0.7302889760970389,
1774
- "eval_loss": 0.5556601285934448,
1775
- "eval_runtime": 270.2761,
1776
- "eval_samples_per_second": 41.484,
1777
- "eval_steps_per_second": 0.163,
1778
- "step": 2567
1779
- },
1780
- {
1781
- "epoch": 26.03,
1782
- "learning_rate": 6.991685563114135e-06,
1783
- "loss": 0.5607,
1784
- "step": 2570
1785
- },
1786
- {
1787
- "epoch": 26.13,
1788
- "learning_rate": 6.802721088435375e-06,
1789
- "loss": 0.5555,
1790
- "step": 2580
1791
- },
1792
- {
1793
- "epoch": 26.23,
1794
- "learning_rate": 6.613756613756614e-06,
1795
- "loss": 0.5527,
1796
- "step": 2590
1797
- },
1798
- {
1799
- "epoch": 26.33,
1800
- "learning_rate": 6.424792139077854e-06,
1801
- "loss": 0.5549,
1802
- "step": 2600
1803
- },
1804
- {
1805
- "epoch": 26.43,
1806
- "learning_rate": 6.235827664399093e-06,
1807
- "loss": 0.5435,
1808
- "step": 2610
1809
- },
1810
- {
1811
- "epoch": 26.53,
1812
- "learning_rate": 6.046863189720333e-06,
1813
- "loss": 0.5511,
1814
- "step": 2620
1815
- },
1816
- {
1817
- "epoch": 26.63,
1818
- "learning_rate": 5.857898715041572e-06,
1819
- "loss": 0.5629,
1820
- "step": 2630
1821
- },
1822
- {
1823
- "epoch": 26.73,
1824
- "learning_rate": 5.668934240362813e-06,
1825
- "loss": 0.5527,
1826
- "step": 2640
1827
- },
1828
- {
1829
- "epoch": 26.84,
1830
- "learning_rate": 5.4799697656840515e-06,
1831
- "loss": 0.5541,
1832
- "step": 2650
1833
- },
1834
- {
1835
- "epoch": 26.94,
1836
- "learning_rate": 5.291005291005291e-06,
1837
- "loss": 0.5563,
1838
- "step": 2660
1839
- },
1840
- {
1841
- "epoch": 27.0,
1842
- "eval_accuracy": 0.727613271494827,
1843
- "eval_loss": 0.5571199655532837,
1844
- "eval_runtime": 269.8955,
1845
- "eval_samples_per_second": 41.542,
1846
- "eval_steps_per_second": 0.163,
1847
- "step": 2666
1848
- },
1849
- {
1850
- "epoch": 27.04,
1851
- "learning_rate": 5.102040816326531e-06,
1852
- "loss": 0.553,
1853
- "step": 2670
1854
- },
1855
- {
1856
- "epoch": 27.14,
1857
- "learning_rate": 4.91307634164777e-06,
1858
- "loss": 0.5502,
1859
- "step": 2680
1860
- },
1861
- {
1862
- "epoch": 27.24,
1863
- "learning_rate": 4.724111866969011e-06,
1864
- "loss": 0.55,
1865
- "step": 2690
1866
  },
1867
  {
1868
- "epoch": 27.34,
1869
- "learning_rate": 4.535147392290249e-06,
1870
- "loss": 0.5527,
1871
- "step": 2700
1872
  },
1873
  {
1874
- "epoch": 27.44,
1875
- "learning_rate": 4.346182917611489e-06,
1876
- "loss": 0.5462,
1877
- "step": 2710
 
 
 
1878
  },
1879
  {
1880
- "epoch": 27.54,
1881
- "learning_rate": 4.1572184429327285e-06,
1882
- "loss": 0.5502,
1883
- "step": 2720
1884
  },
1885
  {
1886
- "epoch": 27.65,
1887
- "learning_rate": 3.968253968253968e-06,
1888
- "loss": 0.5533,
1889
- "step": 2730
1890
  },
1891
  {
1892
- "epoch": 27.75,
1893
- "learning_rate": 3.779289493575208e-06,
1894
- "loss": 0.5585,
1895
- "step": 2740
 
 
 
1896
  },
1897
  {
1898
- "epoch": 27.85,
1899
- "learning_rate": 3.5903250188964477e-06,
1900
- "loss": 0.556,
1901
- "step": 2750
1902
  },
1903
  {
1904
- "epoch": 27.95,
1905
- "learning_rate": 3.4013605442176877e-06,
1906
- "loss": 0.5544,
1907
- "step": 2760
1908
  },
1909
  {
1910
  "epoch": 28.0,
1911
- "eval_accuracy": 0.7298430253300036,
1912
- "eval_loss": 0.5550753474235535,
1913
- "eval_runtime": 273.4563,
1914
- "eval_samples_per_second": 41.001,
1915
- "eval_steps_per_second": 0.161,
1916
- "step": 2765
1917
- },
1918
- {
1919
- "epoch": 28.05,
1920
- "learning_rate": 3.212396069538927e-06,
1921
- "loss": 0.553,
1922
- "step": 2770
1923
- },
1924
- {
1925
- "epoch": 28.15,
1926
- "learning_rate": 3.0234315948601664e-06,
1927
- "loss": 0.5534,
1928
- "step": 2780
1929
  },
1930
  {
1931
- "epoch": 28.25,
1932
- "learning_rate": 2.8344671201814064e-06,
1933
- "loss": 0.5488,
1934
- "step": 2790
1935
- },
1936
- {
1937
- "epoch": 28.35,
1938
- "learning_rate": 2.6455026455026455e-06,
1939
- "loss": 0.5553,
1940
- "step": 2800
1941
- },
1942
- {
1943
- "epoch": 28.46,
1944
- "learning_rate": 2.456538170823885e-06,
1945
- "loss": 0.5503,
1946
- "step": 2810
1947
- },
1948
- {
1949
- "epoch": 28.56,
1950
- "learning_rate": 2.2675736961451247e-06,
1951
- "loss": 0.5493,
1952
- "step": 2820
1953
- },
1954
- {
1955
- "epoch": 28.66,
1956
- "learning_rate": 2.0786092214663643e-06,
1957
- "loss": 0.5474,
1958
- "step": 2830
1959
- },
1960
- {
1961
- "epoch": 28.76,
1962
- "learning_rate": 1.889644746787604e-06,
1963
- "loss": 0.5477,
1964
- "step": 2840
1965
- },
1966
- {
1967
- "epoch": 28.86,
1968
- "learning_rate": 1.7006802721088438e-06,
1969
- "loss": 0.5565,
1970
- "step": 2850
1971
- },
1972
- {
1973
- "epoch": 28.96,
1974
- "learning_rate": 1.5117157974300832e-06,
1975
- "loss": 0.5491,
1976
- "step": 2860
1977
  },
1978
  {
1979
  "epoch": 28.99,
1980
- "eval_accuracy": 0.7282376025686764,
1981
- "eval_loss": 0.5596103668212891,
1982
- "eval_runtime": 270.7488,
1983
- "eval_samples_per_second": 41.411,
1984
- "eval_steps_per_second": 0.163,
1985
- "step": 2863
1986
- },
1987
- {
1988
- "epoch": 29.06,
1989
- "learning_rate": 1.3227513227513228e-06,
1990
- "loss": 0.5508,
1991
- "step": 2870
1992
- },
1993
- {
1994
- "epoch": 29.16,
1995
- "learning_rate": 1.1337868480725623e-06,
1996
- "loss": 0.5513,
1997
- "step": 2880
1998
- },
1999
- {
2000
- "epoch": 29.27,
2001
- "learning_rate": 9.44822373393802e-07,
2002
- "loss": 0.5517,
2003
- "step": 2890
2004
- },
2005
- {
2006
- "epoch": 29.37,
2007
- "learning_rate": 7.558578987150416e-07,
2008
- "loss": 0.5575,
2009
- "step": 2900
2010
  },
2011
  {
2012
- "epoch": 29.47,
2013
- "learning_rate": 5.668934240362812e-07,
2014
- "loss": 0.5519,
2015
- "step": 2910
 
 
 
2016
  },
2017
  {
2018
  "epoch": 29.57,
2019
- "learning_rate": 3.779289493575208e-07,
2020
- "loss": 0.5549,
2021
- "step": 2920
2022
  },
2023
  {
2024
- "epoch": 29.67,
2025
- "learning_rate": 1.889644746787604e-07,
2026
- "loss": 0.5478,
2027
- "step": 2930
 
 
 
2028
  },
2029
  {
2030
- "epoch": 29.77,
2031
- "learning_rate": 0.0,
2032
- "loss": 0.5461,
2033
- "step": 2940
2034
- },
2035
- {
2036
- "epoch": 29.77,
2037
- "eval_accuracy": 0.7302889760970389,
2038
- "eval_loss": 0.5574254989624023,
2039
- "eval_runtime": 267.0764,
2040
- "eval_samples_per_second": 41.98,
2041
- "eval_steps_per_second": 0.165,
2042
- "step": 2940
2043
- },
2044
- {
2045
- "epoch": 29.77,
2046
- "step": 2940,
2047
- "total_flos": 7.46776315809736e+19,
2048
- "train_loss": 0.5755378539870385,
2049
- "train_runtime": 78104.0618,
2050
- "train_samples_per_second": 38.759,
2051
- "train_steps_per_second": 0.038
2052
  }
2053
  ],
2054
- "max_steps": 2940,
2055
  "num_train_epochs": 30,
2056
- "total_flos": 7.46776315809736e+19,
2057
  "trial_name": null,
2058
  "trial_params": null
2059
  }
 
1
  {
2
+ "best_metric": 0.8001030396702731,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-310",
4
+ "epoch": 29.565217391304348,
5
+ "global_step": 510,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.58,
12
+ "learning_rate": 9.803921568627451e-06,
13
+ "loss": 0.6794,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.99,
18
+ "eval_accuracy": 0.7047913446676971,
19
+ "eval_loss": 0.5848240256309509,
20
+ "eval_runtime": 9.3525,
21
+ "eval_samples_per_second": 207.538,
22
+ "eval_steps_per_second": 0.855,
23
+ "step": 17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
  {
26
+ "epoch": 1.16,
27
+ "learning_rate": 1.9607843137254903e-05,
28
+ "loss": 0.6308,
29
+ "step": 20
30
  },
31
  {
32
+ "epoch": 1.74,
33
+ "learning_rate": 2.9411764705882354e-05,
34
+ "loss": 0.5835,
35
+ "step": 30
36
  },
37
  {
38
+ "epoch": 1.97,
39
+ "eval_accuracy": 0.7429160226687275,
40
+ "eval_loss": 0.5206928253173828,
41
+ "eval_runtime": 9.455,
42
+ "eval_samples_per_second": 205.288,
43
+ "eval_steps_per_second": 0.846,
44
+ "step": 34
45
  },
46
  {
47
+ "epoch": 2.32,
48
+ "learning_rate": 3.9215686274509805e-05,
49
+ "loss": 0.5523,
50
+ "step": 40
51
  },
52
  {
53
+ "epoch": 2.9,
54
+ "learning_rate": 4.901960784313725e-05,
55
+ "loss": 0.5257,
56
+ "step": 50
57
  },
58
  {
59
+ "epoch": 2.96,
60
+ "eval_accuracy": 0.7640391550747038,
61
+ "eval_loss": 0.4912124574184418,
62
+ "eval_runtime": 9.367,
63
+ "eval_samples_per_second": 207.218,
64
+ "eval_steps_per_second": 0.854,
65
+ "step": 51
66
  },
67
  {
68
+ "epoch": 3.48,
69
+ "learning_rate": 4.901960784313725e-05,
70
+ "loss": 0.5128,
71
+ "step": 60
72
  },
73
  {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.7743431221020093,
76
+ "eval_loss": 0.46713554859161377,
77
+ "eval_runtime": 9.3663,
78
+ "eval_samples_per_second": 207.232,
79
+ "eval_steps_per_second": 0.854,
80
+ "step": 69
81
  },
82
  {
83
+ "epoch": 4.06,
84
+ "learning_rate": 4.793028322440087e-05,
85
+ "loss": 0.5028,
86
+ "step": 70
87
  },
88
  {
89
+ "epoch": 4.64,
90
+ "learning_rate": 4.684095860566449e-05,
91
+ "loss": 0.4963,
92
+ "step": 80
 
 
 
93
  },
94
  {
95
+ "epoch": 4.99,
96
+ "eval_accuracy": 0.7599175682637815,
97
+ "eval_loss": 0.479315847158432,
98
+ "eval_runtime": 9.3717,
99
+ "eval_samples_per_second": 207.114,
100
+ "eval_steps_per_second": 0.854,
101
+ "step": 86
102
  },
103
  {
104
+ "epoch": 5.22,
105
+ "learning_rate": 4.5751633986928104e-05,
106
+ "loss": 0.4965,
107
+ "step": 90
108
  },
109
  {
110
+ "epoch": 5.8,
111
+ "learning_rate": 4.466230936819172e-05,
112
+ "loss": 0.5005,
113
+ "step": 100
114
  },
115
  {
116
+ "epoch": 5.97,
117
+ "eval_accuracy": 0.7650695517774343,
118
+ "eval_loss": 0.485215961933136,
119
+ "eval_runtime": 9.3698,
120
+ "eval_samples_per_second": 207.154,
121
+ "eval_steps_per_second": 0.854,
122
+ "step": 103
123
  },
124
  {
125
+ "epoch": 6.38,
126
+ "learning_rate": 4.357298474945534e-05,
127
+ "loss": 0.4963,
128
+ "step": 110
129
  },
130
  {
131
+ "epoch": 6.96,
132
+ "learning_rate": 4.2483660130718954e-05,
133
+ "loss": 0.4748,
134
+ "step": 120
135
  },
136
  {
137
+ "epoch": 6.96,
138
+ "eval_accuracy": 0.768160741885626,
139
+ "eval_loss": 0.4841272234916687,
140
+ "eval_runtime": 9.371,
141
+ "eval_samples_per_second": 207.128,
142
+ "eval_steps_per_second": 0.854,
143
+ "step": 120
144
  },
145
  {
146
+ "epoch": 7.54,
147
+ "learning_rate": 4.1394335511982573e-05,
148
+ "loss": 0.4732,
149
+ "step": 130
150
  },
151
  {
152
+ "epoch": 8.0,
153
+ "eval_accuracy": 0.7748583204533745,
154
+ "eval_loss": 0.4430771768093109,
155
+ "eval_runtime": 9.3845,
156
+ "eval_samples_per_second": 206.83,
157
+ "eval_steps_per_second": 0.852,
158
+ "step": 138
159
  },
160
  {
161
+ "epoch": 8.12,
162
+ "learning_rate": 4.0305010893246186e-05,
163
+ "loss": 0.4842,
164
+ "step": 140
 
 
 
165
  },
166
  {
167
+ "epoch": 8.7,
168
+ "learning_rate": 3.9215686274509805e-05,
169
+ "loss": 0.4614,
170
+ "step": 150
171
  },
172
  {
173
+ "epoch": 8.99,
174
+ "eval_accuracy": 0.7856774858320453,
175
+ "eval_loss": 0.4429071843624115,
176
+ "eval_runtime": 9.3915,
177
+ "eval_samples_per_second": 206.677,
178
+ "eval_steps_per_second": 0.852,
179
+ "step": 155
180
  },
181
  {
182
+ "epoch": 9.28,
183
+ "learning_rate": 3.8126361655773424e-05,
184
+ "loss": 0.4645,
185
+ "step": 160
186
  },
187
  {
188
+ "epoch": 9.86,
189
+ "learning_rate": 3.7037037037037037e-05,
190
+ "loss": 0.4647,
191
+ "step": 170
192
  },
193
  {
194
+ "epoch": 9.97,
195
+ "eval_accuracy": 0.7717671303451829,
196
+ "eval_loss": 0.4715192914009094,
197
+ "eval_runtime": 9.3758,
198
+ "eval_samples_per_second": 207.023,
199
+ "eval_steps_per_second": 0.853,
200
+ "step": 172
201
  },
202
  {
203
+ "epoch": 10.43,
204
+ "learning_rate": 3.5947712418300656e-05,
205
+ "loss": 0.4601,
206
+ "step": 180
207
  },
208
  {
209
+ "epoch": 10.96,
210
+ "eval_accuracy": 0.7753735188047398,
211
+ "eval_loss": 0.4722177982330322,
212
+ "eval_runtime": 9.3838,
213
+ "eval_samples_per_second": 206.847,
214
+ "eval_steps_per_second": 0.853,
215
+ "step": 189
216
  },
217
  {
218
+ "epoch": 11.01,
219
+ "learning_rate": 3.4858387799564275e-05,
220
+ "loss": 0.4496,
221
+ "step": 190
222
  },
223
  {
224
+ "epoch": 11.59,
225
+ "learning_rate": 3.376906318082789e-05,
226
+ "loss": 0.4626,
227
+ "step": 200
228
  },
229
  {
230
+ "epoch": 12.0,
231
+ "eval_accuracy": 0.7872230808861411,
232
+ "eval_loss": 0.44434165954589844,
233
+ "eval_runtime": 9.3667,
234
+ "eval_samples_per_second": 207.223,
235
+ "eval_steps_per_second": 0.854,
236
+ "step": 207
237
  },
238
  {
239
+ "epoch": 12.17,
240
+ "learning_rate": 3.2679738562091506e-05,
241
+ "loss": 0.4447,
242
+ "step": 210
 
 
 
243
  },
244
  {
245
+ "epoch": 12.75,
246
+ "learning_rate": 3.1590413943355126e-05,
247
+ "loss": 0.4526,
248
+ "step": 220
249
  },
250
  {
251
+ "epoch": 12.99,
252
+ "eval_accuracy": 0.7908294693456981,
253
+ "eval_loss": 0.4368865489959717,
254
+ "eval_runtime": 9.3607,
255
+ "eval_samples_per_second": 207.356,
256
+ "eval_steps_per_second": 0.855,
257
+ "step": 224
258
  },
259
  {
260
+ "epoch": 13.33,
261
+ "learning_rate": 3.0501089324618738e-05,
262
+ "loss": 0.447,
263
+ "step": 230
264
  },
265
  {
266
+ "epoch": 13.91,
267
+ "learning_rate": 2.9411764705882354e-05,
268
+ "loss": 0.4494,
269
+ "step": 240
270
  },
271
  {
272
+ "epoch": 13.97,
273
+ "eval_accuracy": 0.7970118495620814,
274
+ "eval_loss": 0.42481139302253723,
275
+ "eval_runtime": 9.3639,
276
+ "eval_samples_per_second": 207.286,
277
+ "eval_steps_per_second": 0.854,
278
+ "step": 241
279
  },
280
  {
281
+ "epoch": 14.49,
282
+ "learning_rate": 2.832244008714597e-05,
283
+ "loss": 0.458,
284
+ "step": 250
285
  },
286
  {
287
+ "epoch": 14.96,
288
+ "eval_accuracy": 0.7867078825347759,
289
+ "eval_loss": 0.4402971863746643,
290
+ "eval_runtime": 9.4177,
291
+ "eval_samples_per_second": 206.101,
292
+ "eval_steps_per_second": 0.849,
293
+ "step": 258
294
  },
295
  {
296
+ "epoch": 15.07,
297
+ "learning_rate": 2.7233115468409585e-05,
298
+ "loss": 0.4501,
299
+ "step": 260
300
  },
301
  {
302
+ "epoch": 15.65,
303
+ "learning_rate": 2.6143790849673204e-05,
304
+ "loss": 0.4387,
305
+ "step": 270
306
  },
307
  {
308
+ "epoch": 16.0,
309
+ "eval_accuracy": 0.7964966512107161,
310
+ "eval_loss": 0.4251194894313812,
311
+ "eval_runtime": 9.3671,
312
+ "eval_samples_per_second": 207.214,
313
+ "eval_steps_per_second": 0.854,
314
+ "step": 276
315
  },
316
  {
317
+ "epoch": 16.23,
318
+ "learning_rate": 2.5054466230936817e-05,
319
+ "loss": 0.4448,
320
+ "step": 280
 
 
 
321
  },
322
  {
323
+ "epoch": 16.81,
324
+ "learning_rate": 2.3965141612200436e-05,
325
+ "loss": 0.4314,
326
+ "step": 290
327
  },
328
  {
329
+ "epoch": 16.99,
330
+ "eval_accuracy": 0.7980422462648119,
331
+ "eval_loss": 0.4256560206413269,
332
+ "eval_runtime": 9.3785,
333
+ "eval_samples_per_second": 206.963,
334
+ "eval_steps_per_second": 0.853,
335
+ "step": 293
336
  },
337
  {
338
+ "epoch": 17.39,
339
+ "learning_rate": 2.2875816993464052e-05,
340
+ "loss": 0.4354,
341
+ "step": 300
342
  },
343
  {
344
+ "epoch": 17.97,
345
+ "learning_rate": 2.178649237472767e-05,
346
+ "loss": 0.432,
347
+ "step": 310
348
  },
349
  {
350
+ "epoch": 17.97,
351
+ "eval_accuracy": 0.8001030396702731,
352
+ "eval_loss": 0.42466819286346436,
353
+ "eval_runtime": 9.3749,
354
+ "eval_samples_per_second": 207.041,
355
+ "eval_steps_per_second": 0.853,
356
+ "step": 310
357
  },
358
  {
359
+ "epoch": 18.55,
360
+ "learning_rate": 2.0697167755991287e-05,
361
+ "loss": 0.4372,
362
+ "step": 320
363
  },
364
  {
365
+ "epoch": 18.96,
366
+ "eval_accuracy": 0.7856774858320453,
367
+ "eval_loss": 0.44541600346565247,
368
+ "eval_runtime": 9.3667,
369
+ "eval_samples_per_second": 207.222,
370
+ "eval_steps_per_second": 0.854,
371
+ "step": 327
372
  },
373
  {
374
+ "epoch": 19.13,
375
+ "learning_rate": 1.9607843137254903e-05,
376
+ "loss": 0.4298,
377
+ "step": 330
378
  },
379
  {
380
+ "epoch": 19.71,
381
  "learning_rate": 1.8518518518518518e-05,
382
+ "loss": 0.434,
383
+ "step": 340
 
 
 
 
 
 
384
  },
385
  {
386
  "epoch": 20.0,
387
+ "eval_accuracy": 0.7903142709943328,
388
+ "eval_loss": 0.43263283371925354,
389
+ "eval_runtime": 9.367,
390
+ "eval_samples_per_second": 207.217,
391
+ "eval_steps_per_second": 0.854,
392
+ "step": 345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  },
394
  {
395
+ "epoch": 20.29,
396
+ "learning_rate": 1.7429193899782137e-05,
397
+ "loss": 0.4257,
398
+ "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  },
400
  {
401
+ "epoch": 20.87,
402
+ "learning_rate": 1.6339869281045753e-05,
403
+ "loss": 0.4313,
404
+ "step": 360
405
  },
406
  {
407
  "epoch": 20.99,
408
+ "eval_accuracy": 0.7959814528593508,
409
+ "eval_loss": 0.4253856837749481,
410
+ "eval_runtime": 9.3553,
411
+ "eval_samples_per_second": 207.475,
412
+ "eval_steps_per_second": 0.855,
413
+ "step": 362
 
 
 
 
 
 
 
 
 
 
 
 
414
  },
415
  {
416
+ "epoch": 21.45,
417
+ "learning_rate": 1.5250544662309369e-05,
418
+ "loss": 0.421,
419
+ "step": 370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  },
421
  {
422
  "epoch": 21.97,
423
+ "eval_accuracy": 0.7970118495620814,
424
+ "eval_loss": 0.42772090435028076,
425
+ "eval_runtime": 9.3638,
426
+ "eval_samples_per_second": 207.288,
427
+ "eval_steps_per_second": 0.854,
428
+ "step": 379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  },
430
  {
431
+ "epoch": 22.03,
432
+ "learning_rate": 1.4161220043572985e-05,
433
+ "loss": 0.4207,
434
+ "step": 380
435
  },
436
  {
437
+ "epoch": 22.61,
438
+ "learning_rate": 1.3071895424836602e-05,
439
+ "loss": 0.4272,
440
+ "step": 390
 
 
 
441
  },
442
  {
443
+ "epoch": 22.96,
444
+ "eval_accuracy": 0.7980422462648119,
445
+ "eval_loss": 0.4280939996242523,
446
+ "eval_runtime": 9.3579,
447
+ "eval_samples_per_second": 207.419,
448
+ "eval_steps_per_second": 0.855,
449
+ "step": 396
450
  },
451
  {
452
  "epoch": 23.19,
453
+ "learning_rate": 1.1982570806100218e-05,
454
+ "loss": 0.4185,
455
+ "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  },
457
  {
458
+ "epoch": 23.77,
459
+ "learning_rate": 1.0893246187363835e-05,
460
+ "loss": 0.4245,
461
+ "step": 410
462
  },
463
  {
464
  "epoch": 24.0,
465
+ "eval_accuracy": 0.7959814528593508,
466
+ "eval_loss": 0.4282556176185608,
467
+ "eval_runtime": 9.3525,
468
+ "eval_samples_per_second": 207.539,
469
+ "eval_steps_per_second": 0.855,
470
+ "step": 414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  },
472
  {
473
+ "epoch": 24.35,
474
+ "learning_rate": 9.803921568627451e-06,
475
+ "loss": 0.4179,
476
+ "step": 420
 
 
 
 
 
 
477
  },
478
  {
479
+ "epoch": 24.93,
480
+ "learning_rate": 8.714596949891069e-06,
481
+ "loss": 0.4207,
482
+ "step": 430
483
  },
484
  {
485
  "epoch": 24.99,
486
+ "eval_accuracy": 0.7892838742916023,
487
+ "eval_loss": 0.4409063458442688,
488
+ "eval_runtime": 9.3603,
489
+ "eval_samples_per_second": 207.365,
490
+ "eval_steps_per_second": 0.855,
491
+ "step": 431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  },
493
  {
494
+ "epoch": 25.51,
495
+ "learning_rate": 7.6252723311546845e-06,
496
+ "loss": 0.4134,
497
+ "step": 440
498
  },
499
  {
500
+ "epoch": 25.97,
501
+ "eval_accuracy": 0.7964966512107161,
502
+ "eval_loss": 0.43111562728881836,
503
+ "eval_runtime": 9.3664,
504
+ "eval_samples_per_second": 207.229,
505
+ "eval_steps_per_second": 0.854,
506
+ "step": 448
507
  },
508
  {
509
+ "epoch": 26.09,
510
+ "learning_rate": 6.535947712418301e-06,
511
+ "loss": 0.4216,
512
+ "step": 450
513
  },
514
  {
515
+ "epoch": 26.67,
516
+ "learning_rate": 5.446623093681918e-06,
517
+ "loss": 0.4157,
518
+ "step": 460
519
  },
520
  {
521
+ "epoch": 26.96,
522
+ "eval_accuracy": 0.794435857805255,
523
+ "eval_loss": 0.43559595942497253,
524
+ "eval_runtime": 9.3485,
525
+ "eval_samples_per_second": 207.628,
526
+ "eval_steps_per_second": 0.856,
527
+ "step": 465
528
  },
529
  {
530
+ "epoch": 27.25,
531
+ "learning_rate": 4.357298474945534e-06,
532
+ "loss": 0.4235,
533
+ "step": 470
534
  },
535
  {
536
+ "epoch": 27.83,
537
+ "learning_rate": 3.2679738562091506e-06,
538
+ "loss": 0.4207,
539
+ "step": 480
540
  },
541
  {
542
  "epoch": 28.0,
543
+ "eval_accuracy": 0.7980422462648119,
544
+ "eval_loss": 0.4301479756832123,
545
+ "eval_runtime": 9.3663,
546
+ "eval_samples_per_second": 207.233,
547
+ "eval_steps_per_second": 0.854,
548
+ "step": 483
 
 
 
 
 
 
 
 
 
 
 
 
549
  },
550
  {
551
+ "epoch": 28.41,
552
+ "learning_rate": 2.178649237472767e-06,
553
+ "loss": 0.4128,
554
+ "step": 490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  },
556
  {
557
  "epoch": 28.99,
558
+ "learning_rate": 1.0893246187363836e-06,
559
+ "loss": 0.4104,
560
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  },
562
  {
563
+ "epoch": 28.99,
564
+ "eval_accuracy": 0.7954662545079856,
565
+ "eval_loss": 0.4339882433414459,
566
+ "eval_runtime": 9.3665,
567
+ "eval_samples_per_second": 207.228,
568
+ "eval_steps_per_second": 0.854,
569
+ "step": 500
570
  },
571
  {
572
  "epoch": 29.57,
573
+ "learning_rate": 0.0,
574
+ "loss": 0.4187,
575
+ "step": 510
576
  },
577
  {
578
+ "epoch": 29.57,
579
+ "eval_accuracy": 0.7954662545079856,
580
+ "eval_loss": 0.4336837828159332,
581
+ "eval_runtime": 9.3744,
582
+ "eval_samples_per_second": 207.053,
583
+ "eval_steps_per_second": 0.853,
584
+ "step": 510
585
  },
586
  {
587
+ "epoch": 29.57,
588
+ "step": 510,
589
+ "total_flos": 1.2835906128174883e+19,
590
+ "train_loss": 0.46080853518317727,
591
+ "train_runtime": 3932.5078,
592
+ "train_samples_per_second": 133.22,
593
+ "train_steps_per_second": 0.13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  }
595
  ],
596
+ "max_steps": 510,
597
  "num_train_epochs": 30,
598
+ "total_flos": 1.2835906128174883e+19,
599
  "trial_name": null,
600
  "trial_params": null
601
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb5d63c15208349c3aa5cc56038343a6be9f981d8851ddd77aeda88be337956
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef939cd59d81be9f37bfadc055ef8cab25db668087412c32c6927dcb41799913
3
  size 4027