ALM-AHME commited on
Commit
c7616b1
1 Parent(s): 8215e3b

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.96,
3
- "eval_accuracy": 0.9995570321151717,
4
- "eval_loss": 0.001114627462811768,
5
- "eval_runtime": 195.9028,
6
- "eval_samples_per_second": 23.047,
7
- "eval_steps_per_second": 1.445,
8
  "total_flos": 8.138660625246413e+18,
9
- "train_loss": 0.10703883119408161,
10
- "train_runtime": 7147.9833,
11
- "train_samples_per_second": 4.186,
12
- "train_steps_per_second": 0.065
13
  }
 
1
  {
2
  "epoch": 4.96,
3
+ "eval_accuracy": 0.9805094130675526,
4
+ "eval_loss": 0.04738219827413559,
5
+ "eval_runtime": 194.8962,
6
+ "eval_samples_per_second": 23.166,
7
+ "eval_steps_per_second": 1.452,
8
  "total_flos": 8.138660625246413e+18,
9
+ "train_loss": 0.23329446437538312,
10
+ "train_runtime": 4933.7001,
11
+ "train_samples_per_second": 6.065,
12
+ "train_steps_per_second": 0.094
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.96,
3
- "eval_accuracy": 0.9995570321151717,
4
- "eval_loss": 0.001114627462811768,
5
- "eval_runtime": 195.9028,
6
- "eval_samples_per_second": 23.047,
7
- "eval_steps_per_second": 1.445
8
  }
 
1
  {
2
  "epoch": 4.96,
3
+ "eval_accuracy": 0.9805094130675526,
4
+ "eval_loss": 0.04738219827413559,
5
+ "eval_runtime": 194.8962,
6
+ "eval_samples_per_second": 23.166,
7
+ "eval_steps_per_second": 1.452
8
  }
runs/Jul11_19-14-19_15fcd4b534d1/events.out.tfevents.1689108114.15fcd4b534d1.2772.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2dfdaf22c4157f99130fcec28a4cbc20d89775c2f999c1ce6fc839a0368254
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.96,
3
  "total_flos": 8.138660625246413e+18,
4
- "train_loss": 0.10703883119408161,
5
- "train_runtime": 7147.9833,
6
- "train_samples_per_second": 4.186,
7
- "train_steps_per_second": 0.065
8
  }
 
1
  {
2
  "epoch": 4.96,
3
  "total_flos": 8.138660625246413e+18,
4
+ "train_loss": 0.23329446437538312,
5
+ "train_runtime": 4933.7001,
6
+ "train_samples_per_second": 6.065,
7
+ "train_steps_per_second": 0.094
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9995570321151717,
3
  "best_model_checkpoint": "beit-large-patch16-224-finetuned-LungCancer-Classification-LC25000-AH-40-30-30/checkpoint-465",
4
  "epoch": 4.96,
5
  "global_step": 465,
@@ -9,615 +9,615 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
- "learning_rate": 1.0729613733905579e-06,
13
- "loss": 1.1839,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.11,
18
- "learning_rate": 2.1459227467811158e-06,
19
- "loss": 0.9886,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.16,
24
- "learning_rate": 3.218884120171674e-06,
25
- "loss": 0.742,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.21,
30
- "learning_rate": 4.2918454935622316e-06,
31
- "loss": 0.4725,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.27,
36
- "learning_rate": 5.36480686695279e-06,
37
- "loss": 0.2834,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.32,
42
- "learning_rate": 6.437768240343348e-06,
43
- "loss": 0.2146,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.37,
48
- "learning_rate": 7.510729613733906e-06,
49
- "loss": 0.1667,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.43,
54
- "learning_rate": 8.583690987124463e-06,
55
- "loss": 0.129,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.48,
60
- "learning_rate": 9.65665236051502e-06,
61
- "loss": 0.1718,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 0.53,
66
- "learning_rate": 1.072961373390558e-05,
67
- "loss": 0.1387,
68
  "step": 50
69
  },
70
  {
71
  "epoch": 0.59,
72
- "learning_rate": 1.1802575107296138e-05,
73
- "loss": 0.1188,
74
  "step": 55
75
  },
76
  {
77
  "epoch": 0.64,
78
- "learning_rate": 1.2875536480686696e-05,
79
- "loss": 0.0936,
80
  "step": 60
81
  },
82
  {
83
  "epoch": 0.69,
84
- "learning_rate": 1.3948497854077253e-05,
85
- "loss": 0.1547,
86
  "step": 65
87
  },
88
  {
89
  "epoch": 0.75,
90
- "learning_rate": 1.5021459227467813e-05,
91
- "loss": 0.1528,
92
  "step": 70
93
  },
94
  {
95
  "epoch": 0.8,
96
- "learning_rate": 1.609442060085837e-05,
97
- "loss": 0.1509,
98
  "step": 75
99
  },
100
  {
101
  "epoch": 0.85,
102
- "learning_rate": 1.7167381974248926e-05,
103
- "loss": 0.1046,
104
  "step": 80
105
  },
106
  {
107
  "epoch": 0.91,
108
- "learning_rate": 1.8240343347639486e-05,
109
- "loss": 0.1003,
110
  "step": 85
111
  },
112
  {
113
  "epoch": 0.96,
114
- "learning_rate": 1.931330472103004e-05,
115
- "loss": 0.0508,
116
  "step": 90
117
  },
118
  {
119
  "epoch": 0.99,
120
- "eval_accuracy": 0.9756367663344407,
121
- "eval_loss": 0.06337086111307144,
122
- "eval_runtime": 1162.9787,
123
- "eval_samples_per_second": 3.882,
124
- "eval_steps_per_second": 0.243,
125
  "step": 93
126
  },
127
  {
128
  "epoch": 1.01,
129
- "learning_rate": 2.0386266094420604e-05,
130
- "loss": 0.0488,
131
  "step": 95
132
  },
133
  {
134
  "epoch": 1.07,
135
- "learning_rate": 2.145922746781116e-05,
136
- "loss": 0.0625,
137
  "step": 100
138
  },
139
  {
140
  "epoch": 1.12,
141
- "learning_rate": 2.2532188841201716e-05,
142
- "loss": 0.1041,
143
  "step": 105
144
  },
145
  {
146
  "epoch": 1.17,
147
- "learning_rate": 2.3605150214592276e-05,
148
- "loss": 0.096,
149
  "step": 110
150
  },
151
  {
152
  "epoch": 1.23,
153
- "learning_rate": 2.467811158798283e-05,
154
- "loss": 0.0872,
155
  "step": 115
156
  },
157
  {
158
  "epoch": 1.28,
159
- "learning_rate": 2.575107296137339e-05,
160
- "loss": 0.119,
161
  "step": 120
162
  },
163
  {
164
  "epoch": 1.33,
165
- "learning_rate": 2.6824034334763947e-05,
166
- "loss": 0.0861,
167
  "step": 125
168
  },
169
  {
170
  "epoch": 1.39,
171
- "learning_rate": 2.7896995708154506e-05,
172
- "loss": 0.1535,
173
  "step": 130
174
  },
175
  {
176
  "epoch": 1.44,
177
- "learning_rate": 2.896995708154507e-05,
178
- "loss": 0.1654,
179
  "step": 135
180
  },
181
  {
182
  "epoch": 1.49,
183
- "learning_rate": 3.0042918454935625e-05,
184
- "loss": 0.0787,
185
  "step": 140
186
  },
187
  {
188
  "epoch": 1.55,
189
- "learning_rate": 3.1115879828326185e-05,
190
- "loss": 0.0531,
191
  "step": 145
192
  },
193
  {
194
  "epoch": 1.6,
195
- "learning_rate": 3.218884120171674e-05,
196
- "loss": 0.0825,
197
  "step": 150
198
  },
199
  {
200
  "epoch": 1.65,
201
- "learning_rate": 3.3261802575107297e-05,
202
- "loss": 0.0922,
203
  "step": 155
204
  },
205
  {
206
  "epoch": 1.71,
207
- "learning_rate": 3.433476394849785e-05,
208
- "loss": 0.0717,
209
  "step": 160
210
  },
211
  {
212
  "epoch": 1.76,
213
- "learning_rate": 3.5407725321888415e-05,
214
- "loss": 0.0787,
215
  "step": 165
216
  },
217
  {
218
  "epoch": 1.81,
219
- "learning_rate": 3.648068669527897e-05,
220
- "loss": 0.0244,
221
  "step": 170
222
  },
223
  {
224
  "epoch": 1.87,
225
- "learning_rate": 3.755364806866953e-05,
226
- "loss": 0.0837,
227
  "step": 175
228
  },
229
  {
230
  "epoch": 1.92,
231
- "learning_rate": 3.862660944206008e-05,
232
- "loss": 0.0572,
233
  "step": 180
234
  },
235
  {
236
  "epoch": 1.97,
237
- "learning_rate": 3.9699570815450646e-05,
238
- "loss": 0.0909,
239
  "step": 185
240
  },
241
  {
242
  "epoch": 1.99,
243
- "eval_accuracy": 0.991140642303433,
244
- "eval_loss": 0.022178057581186295,
245
- "eval_runtime": 195.0995,
246
- "eval_samples_per_second": 23.142,
247
- "eval_steps_per_second": 1.451,
248
  "step": 187
249
  },
250
  {
251
  "epoch": 2.03,
252
- "learning_rate": 4.077253218884121e-05,
253
- "loss": 0.0909,
254
  "step": 190
255
  },
256
  {
257
  "epoch": 2.08,
258
- "learning_rate": 4.1845493562231765e-05,
259
- "loss": 0.0431,
260
  "step": 195
261
  },
262
  {
263
  "epoch": 2.13,
264
- "learning_rate": 4.291845493562232e-05,
265
- "loss": 0.0511,
266
  "step": 200
267
  },
268
  {
269
  "epoch": 2.19,
270
- "learning_rate": 4.399141630901288e-05,
271
- "loss": 0.0787,
272
  "step": 205
273
  },
274
  {
275
  "epoch": 2.24,
276
- "learning_rate": 4.506437768240343e-05,
277
- "loss": 0.0467,
278
  "step": 210
279
  },
280
  {
281
  "epoch": 2.29,
282
- "learning_rate": 4.6137339055793995e-05,
283
- "loss": 0.1184,
284
  "step": 215
285
  },
286
  {
287
  "epoch": 2.35,
288
- "learning_rate": 4.721030042918455e-05,
289
- "loss": 0.1344,
290
  "step": 220
291
  },
292
  {
293
  "epoch": 2.4,
294
- "learning_rate": 4.828326180257511e-05,
295
- "loss": 0.1384,
296
  "step": 225
297
  },
298
  {
299
  "epoch": 2.45,
300
- "learning_rate": 4.935622317596566e-05,
301
- "loss": 0.0953,
302
  "step": 230
303
  },
304
  {
305
  "epoch": 2.51,
306
- "learning_rate": 4.9568965517241384e-05,
307
- "loss": 0.0667,
308
  "step": 235
309
  },
310
  {
311
  "epoch": 2.56,
312
- "learning_rate": 4.849137931034483e-05,
313
- "loss": 0.0576,
314
  "step": 240
315
  },
316
  {
317
  "epoch": 2.61,
318
- "learning_rate": 4.741379310344828e-05,
319
- "loss": 0.1106,
320
  "step": 245
321
  },
322
  {
323
  "epoch": 2.67,
324
- "learning_rate": 4.633620689655173e-05,
325
- "loss": 0.1455,
326
  "step": 250
327
  },
328
  {
329
  "epoch": 2.72,
330
- "learning_rate": 4.5258620689655176e-05,
331
- "loss": 0.0161,
332
  "step": 255
333
  },
334
  {
335
  "epoch": 2.77,
336
- "learning_rate": 4.418103448275862e-05,
337
- "loss": 0.0959,
338
  "step": 260
339
  },
340
  {
341
  "epoch": 2.83,
342
- "learning_rate": 4.3103448275862066e-05,
343
- "loss": 0.1851,
344
  "step": 265
345
  },
346
  {
347
  "epoch": 2.88,
348
- "learning_rate": 4.202586206896552e-05,
349
- "loss": 0.0497,
350
  "step": 270
351
  },
352
  {
353
  "epoch": 2.93,
354
- "learning_rate": 4.094827586206897e-05,
355
- "loss": 0.0799,
356
  "step": 275
357
  },
358
  {
359
  "epoch": 2.99,
360
- "learning_rate": 3.9870689655172416e-05,
361
- "loss": 0.0641,
362
  "step": 280
363
  },
364
  {
365
  "epoch": 3.0,
366
- "eval_accuracy": 0.9913621262458472,
367
- "eval_loss": 0.022775284945964813,
368
- "eval_runtime": 195.3033,
369
- "eval_samples_per_second": 23.118,
370
- "eval_steps_per_second": 1.449,
371
  "step": 281
372
  },
373
  {
374
  "epoch": 3.04,
375
- "learning_rate": 3.8793103448275865e-05,
376
- "loss": 0.0663,
377
  "step": 285
378
  },
379
  {
380
  "epoch": 3.09,
381
- "learning_rate": 3.771551724137931e-05,
382
- "loss": 0.045,
383
  "step": 290
384
  },
385
  {
386
  "epoch": 3.15,
387
- "learning_rate": 3.663793103448276e-05,
388
- "loss": 0.0787,
389
  "step": 295
390
  },
391
  {
392
  "epoch": 3.2,
393
- "learning_rate": 3.556034482758621e-05,
394
- "loss": 0.04,
395
  "step": 300
396
  },
397
  {
398
  "epoch": 3.25,
399
- "learning_rate": 3.4482758620689657e-05,
400
- "loss": 0.0534,
401
  "step": 305
402
  },
403
  {
404
  "epoch": 3.31,
405
- "learning_rate": 3.3405172413793105e-05,
406
- "loss": 0.0578,
407
  "step": 310
408
  },
409
  {
410
  "epoch": 3.36,
411
- "learning_rate": 3.232758620689655e-05,
412
- "loss": 0.021,
413
  "step": 315
414
  },
415
  {
416
  "epoch": 3.41,
417
- "learning_rate": 3.125e-05,
418
- "loss": 0.0138,
419
  "step": 320
420
  },
421
  {
422
  "epoch": 3.47,
423
- "learning_rate": 3.017241379310345e-05,
424
- "loss": 0.0461,
425
  "step": 325
426
  },
427
  {
428
  "epoch": 3.52,
429
- "learning_rate": 2.9094827586206897e-05,
430
- "loss": 0.0791,
431
  "step": 330
432
  },
433
  {
434
  "epoch": 3.57,
435
- "learning_rate": 2.8017241379310345e-05,
436
- "loss": 0.0544,
437
  "step": 335
438
  },
439
  {
440
  "epoch": 3.63,
441
- "learning_rate": 2.6939655172413796e-05,
442
- "loss": 0.0455,
443
  "step": 340
444
  },
445
  {
446
  "epoch": 3.68,
447
- "learning_rate": 2.5862068965517244e-05,
448
- "loss": 0.0334,
449
  "step": 345
450
  },
451
  {
452
  "epoch": 3.73,
453
- "learning_rate": 2.4784482758620692e-05,
454
- "loss": 0.0099,
455
  "step": 350
456
  },
457
  {
458
  "epoch": 3.79,
459
- "learning_rate": 2.370689655172414e-05,
460
- "loss": 0.0274,
461
  "step": 355
462
  },
463
  {
464
  "epoch": 3.84,
465
- "learning_rate": 2.2629310344827588e-05,
466
- "loss": 0.0298,
467
  "step": 360
468
  },
469
  {
470
  "epoch": 3.89,
471
- "learning_rate": 2.1551724137931033e-05,
472
- "loss": 0.0238,
473
  "step": 365
474
  },
475
  {
476
  "epoch": 3.95,
477
- "learning_rate": 2.0474137931034484e-05,
478
- "loss": 0.025,
479
  "step": 370
480
  },
481
  {
482
  "epoch": 4.0,
483
- "learning_rate": 1.9396551724137932e-05,
484
- "loss": 0.0717,
485
  "step": 375
486
  },
487
  {
488
  "epoch": 4.0,
489
- "eval_accuracy": 0.9982281284606866,
490
- "eval_loss": 0.005040737800300121,
491
- "eval_runtime": 195.0025,
492
- "eval_samples_per_second": 23.154,
493
- "eval_steps_per_second": 1.451,
494
  "step": 375
495
  },
496
  {
497
  "epoch": 4.05,
498
- "learning_rate": 1.831896551724138e-05,
499
- "loss": 0.0287,
500
  "step": 380
501
  },
502
  {
503
  "epoch": 4.11,
504
- "learning_rate": 1.7241379310344828e-05,
505
- "loss": 0.0194,
506
  "step": 385
507
  },
508
  {
509
  "epoch": 4.16,
510
- "learning_rate": 1.6163793103448276e-05,
511
- "loss": 0.0255,
512
  "step": 390
513
  },
514
  {
515
  "epoch": 4.21,
516
- "learning_rate": 1.5086206896551724e-05,
517
- "loss": 0.0315,
518
  "step": 395
519
  },
520
  {
521
  "epoch": 4.27,
522
- "learning_rate": 1.4008620689655172e-05,
523
- "loss": 0.0677,
524
  "step": 400
525
  },
526
  {
527
  "epoch": 4.32,
528
- "learning_rate": 1.2931034482758622e-05,
529
- "loss": 0.0137,
530
  "step": 405
531
  },
532
  {
533
  "epoch": 4.37,
534
- "learning_rate": 1.185344827586207e-05,
535
- "loss": 0.0165,
536
  "step": 410
537
  },
538
  {
539
  "epoch": 4.43,
540
- "learning_rate": 1.0775862068965516e-05,
541
- "loss": 0.0133,
542
  "step": 415
543
  },
544
  {
545
  "epoch": 4.48,
546
- "learning_rate": 9.698275862068966e-06,
547
- "loss": 0.0235,
548
  "step": 420
549
  },
550
  {
551
  "epoch": 4.53,
552
- "learning_rate": 8.620689655172414e-06,
553
- "loss": 0.0158,
554
  "step": 425
555
  },
556
  {
557
  "epoch": 4.59,
558
- "learning_rate": 7.543103448275862e-06,
559
- "loss": 0.0428,
560
  "step": 430
561
  },
562
  {
563
  "epoch": 4.64,
564
- "learning_rate": 6.465517241379311e-06,
565
- "loss": 0.04,
566
  "step": 435
567
  },
568
  {
569
  "epoch": 4.69,
570
- "learning_rate": 5.387931034482758e-06,
571
- "loss": 0.0048,
572
  "step": 440
573
  },
574
  {
575
  "epoch": 4.75,
576
- "learning_rate": 4.310344827586207e-06,
577
- "loss": 0.0242,
578
  "step": 445
579
  },
580
  {
581
  "epoch": 4.8,
582
- "learning_rate": 3.2327586206896555e-06,
583
- "loss": 0.0141,
584
  "step": 450
585
  },
586
  {
587
  "epoch": 4.85,
588
- "learning_rate": 2.1551724137931035e-06,
589
- "loss": 0.0076,
590
  "step": 455
591
  },
592
  {
593
  "epoch": 4.91,
594
- "learning_rate": 1.0775862068965518e-06,
595
- "loss": 0.0206,
596
  "step": 460
597
  },
598
  {
599
  "epoch": 4.96,
600
  "learning_rate": 0.0,
601
- "loss": 0.0012,
602
  "step": 465
603
  },
604
  {
605
  "epoch": 4.96,
606
- "eval_accuracy": 0.9995570321151717,
607
- "eval_loss": 0.001114627462811768,
608
- "eval_runtime": 194.5559,
609
- "eval_samples_per_second": 23.207,
610
- "eval_steps_per_second": 1.455,
611
  "step": 465
612
  },
613
  {
614
  "epoch": 4.96,
615
  "step": 465,
616
  "total_flos": 8.138660625246413e+18,
617
- "train_loss": 0.10703883119408161,
618
- "train_runtime": 7147.9833,
619
- "train_samples_per_second": 4.186,
620
- "train_steps_per_second": 0.065
621
  }
622
  ],
623
  "max_steps": 465,
 
1
  {
2
+ "best_metric": 0.9805094130675526,
3
  "best_model_checkpoint": "beit-large-patch16-224-finetuned-LungCancer-Classification-LC25000-AH-40-30-30/checkpoint-465",
4
  "epoch": 4.96,
5
  "global_step": 465,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
+ "learning_rate": 1.072961373390558e-05,
13
+ "loss": 0.8808,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.11,
18
+ "learning_rate": 2.145922746781116e-05,
19
+ "loss": 0.3702,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.16,
24
+ "learning_rate": 3.2188841201716734e-05,
25
+ "loss": 0.2654,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.21,
30
+ "learning_rate": 4.291845493562232e-05,
31
+ "loss": 0.2283,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.27,
36
+ "learning_rate": 5.36480686695279e-05,
37
+ "loss": 0.1849,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.32,
42
+ "learning_rate": 6.437768240343347e-05,
43
+ "loss": 0.1441,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.37,
48
+ "learning_rate": 7.510729613733907e-05,
49
+ "loss": 0.4052,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.43,
54
+ "learning_rate": 8.583690987124464e-05,
55
+ "loss": 0.1893,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.48,
60
+ "learning_rate": 9.656652360515021e-05,
61
+ "loss": 0.1277,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 0.53,
66
+ "learning_rate": 0.0001072961373390558,
67
+ "loss": 0.0835,
68
  "step": 50
69
  },
70
  {
71
  "epoch": 0.59,
72
+ "learning_rate": 0.00011802575107296138,
73
+ "loss": 0.3402,
74
  "step": 55
75
  },
76
  {
77
  "epoch": 0.64,
78
+ "learning_rate": 0.00012875536480686693,
79
+ "loss": 0.2402,
80
  "step": 60
81
  },
82
  {
83
  "epoch": 0.69,
84
+ "learning_rate": 0.00013948497854077252,
85
+ "loss": 0.1546,
86
  "step": 65
87
  },
88
  {
89
  "epoch": 0.75,
90
+ "learning_rate": 0.00015021459227467814,
91
+ "loss": 0.1784,
92
  "step": 70
93
  },
94
  {
95
  "epoch": 0.8,
96
+ "learning_rate": 0.0001609442060085837,
97
+ "loss": 0.3537,
98
  "step": 75
99
  },
100
  {
101
  "epoch": 0.85,
102
+ "learning_rate": 0.00017167381974248928,
103
+ "loss": 0.2355,
104
  "step": 80
105
  },
106
  {
107
  "epoch": 0.91,
108
+ "learning_rate": 0.00018240343347639484,
109
+ "loss": 0.2231,
110
  "step": 85
111
  },
112
  {
113
  "epoch": 0.96,
114
+ "learning_rate": 0.00019313304721030043,
115
+ "loss": 0.2312,
116
  "step": 90
117
  },
118
  {
119
  "epoch": 0.99,
120
+ "eval_accuracy": 0.9452934662236988,
121
+ "eval_loss": 0.18224409222602844,
122
+ "eval_runtime": 194.4559,
123
+ "eval_samples_per_second": 23.219,
124
+ "eval_steps_per_second": 1.455,
125
  "step": 93
126
  },
127
  {
128
  "epoch": 1.01,
129
+ "learning_rate": 0.00020386266094420602,
130
+ "loss": 0.1662,
131
  "step": 95
132
  },
133
  {
134
  "epoch": 1.07,
135
+ "learning_rate": 0.0002145922746781116,
136
+ "loss": 0.2914,
137
  "step": 100
138
  },
139
  {
140
  "epoch": 1.12,
141
+ "learning_rate": 0.00022532188841201716,
142
+ "loss": 0.3028,
143
  "step": 105
144
  },
145
  {
146
  "epoch": 1.17,
147
+ "learning_rate": 0.00023605150214592275,
148
+ "loss": 0.2727,
149
  "step": 110
150
  },
151
  {
152
  "epoch": 1.23,
153
+ "learning_rate": 0.0002467811158798283,
154
+ "loss": 0.1559,
155
  "step": 115
156
  },
157
  {
158
  "epoch": 1.28,
159
+ "learning_rate": 0.00025751072961373387,
160
+ "loss": 0.2356,
161
  "step": 120
162
  },
163
  {
164
  "epoch": 1.33,
165
+ "learning_rate": 0.0002682403433476395,
166
+ "loss": 0.1691,
167
  "step": 125
168
  },
169
  {
170
  "epoch": 1.39,
171
+ "learning_rate": 0.00027896995708154504,
172
+ "loss": 0.1779,
173
  "step": 130
174
  },
175
  {
176
  "epoch": 1.44,
177
+ "learning_rate": 0.00028969957081545066,
178
+ "loss": 0.242,
179
  "step": 135
180
  },
181
  {
182
  "epoch": 1.49,
183
+ "learning_rate": 0.00030042918454935627,
184
+ "loss": 0.4023,
185
  "step": 140
186
  },
187
  {
188
  "epoch": 1.55,
189
+ "learning_rate": 0.00031115879828326183,
190
+ "loss": 0.2509,
191
  "step": 145
192
  },
193
  {
194
  "epoch": 1.6,
195
+ "learning_rate": 0.0003218884120171674,
196
+ "loss": 0.4505,
197
  "step": 150
198
  },
199
  {
200
  "epoch": 1.65,
201
+ "learning_rate": 0.00033261802575107295,
202
+ "loss": 0.2701,
203
  "step": 155
204
  },
205
  {
206
  "epoch": 1.71,
207
+ "learning_rate": 0.00034334763948497857,
208
+ "loss": 0.2335,
209
  "step": 160
210
  },
211
  {
212
  "epoch": 1.76,
213
+ "learning_rate": 0.0003540772532188841,
214
+ "loss": 0.2924,
215
  "step": 165
216
  },
217
  {
218
  "epoch": 1.81,
219
+ "learning_rate": 0.0003648068669527897,
220
+ "loss": 0.2749,
221
  "step": 170
222
  },
223
  {
224
  "epoch": 1.87,
225
+ "learning_rate": 0.00037553648068669525,
226
+ "loss": 0.4105,
227
  "step": 175
228
  },
229
  {
230
  "epoch": 1.92,
231
+ "learning_rate": 0.00038626609442060086,
232
+ "loss": 0.4208,
233
  "step": 180
234
  },
235
  {
236
  "epoch": 1.97,
237
+ "learning_rate": 0.0003969957081545064,
238
+ "loss": 0.3817,
239
  "step": 185
240
  },
241
  {
242
  "epoch": 1.99,
243
+ "eval_accuracy": 0.9182724252491694,
244
+ "eval_loss": 0.210577130317688,
245
+ "eval_runtime": 194.2162,
246
+ "eval_samples_per_second": 23.247,
247
+ "eval_steps_per_second": 1.457,
248
  "step": 187
249
  },
250
  {
251
  "epoch": 2.03,
252
+ "learning_rate": 0.00040772532188841203,
253
+ "loss": 0.457,
254
  "step": 190
255
  },
256
  {
257
  "epoch": 2.08,
258
+ "learning_rate": 0.00041845493562231765,
259
+ "loss": 0.2274,
260
  "step": 195
261
  },
262
  {
263
  "epoch": 2.13,
264
+ "learning_rate": 0.0004291845493562232,
265
+ "loss": 0.192,
266
  "step": 200
267
  },
268
  {
269
  "epoch": 2.19,
270
+ "learning_rate": 0.00043991416309012877,
271
+ "loss": 0.344,
272
  "step": 205
273
  },
274
  {
275
  "epoch": 2.24,
276
+ "learning_rate": 0.0004506437768240343,
277
+ "loss": 0.227,
278
  "step": 210
279
  },
280
  {
281
  "epoch": 2.29,
282
+ "learning_rate": 0.00046137339055793994,
283
+ "loss": 0.4637,
284
  "step": 215
285
  },
286
  {
287
  "epoch": 2.35,
288
+ "learning_rate": 0.0004721030042918455,
289
+ "loss": 0.2279,
290
  "step": 220
291
  },
292
  {
293
  "epoch": 2.4,
294
+ "learning_rate": 0.00048283261802575106,
295
+ "loss": 0.2592,
296
  "step": 225
297
  },
298
  {
299
  "epoch": 2.45,
300
+ "learning_rate": 0.0004935622317596566,
301
+ "loss": 0.1698,
302
  "step": 230
303
  },
304
  {
305
  "epoch": 2.51,
306
+ "learning_rate": 0.0004956896551724138,
307
+ "loss": 0.3807,
308
  "step": 235
309
  },
310
  {
311
  "epoch": 2.56,
312
+ "learning_rate": 0.0004849137931034483,
313
+ "loss": 0.4219,
314
  "step": 240
315
  },
316
  {
317
  "epoch": 2.61,
318
+ "learning_rate": 0.00047413793103448276,
319
+ "loss": 0.1977,
320
  "step": 245
321
  },
322
  {
323
  "epoch": 2.67,
324
+ "learning_rate": 0.00046336206896551726,
325
+ "loss": 0.162,
326
  "step": 250
327
  },
328
  {
329
  "epoch": 2.72,
330
+ "learning_rate": 0.0004525862068965517,
331
+ "loss": 0.2464,
332
  "step": 255
333
  },
334
  {
335
  "epoch": 2.77,
336
+ "learning_rate": 0.0004418103448275862,
337
+ "loss": 0.554,
338
  "step": 260
339
  },
340
  {
341
  "epoch": 2.83,
342
+ "learning_rate": 0.0004310344827586207,
343
+ "loss": 0.4732,
344
  "step": 265
345
  },
346
  {
347
  "epoch": 2.88,
348
+ "learning_rate": 0.0004202586206896552,
349
+ "loss": 0.3694,
350
  "step": 270
351
  },
352
  {
353
  "epoch": 2.93,
354
+ "learning_rate": 0.00040948275862068967,
355
+ "loss": 0.2819,
356
  "step": 275
357
  },
358
  {
359
  "epoch": 2.99,
360
+ "learning_rate": 0.00039870689655172416,
361
+ "loss": 0.2217,
362
  "step": 280
363
  },
364
  {
365
  "epoch": 3.0,
366
+ "eval_accuracy": 0.9284606866002215,
367
+ "eval_loss": 0.190183624625206,
368
+ "eval_runtime": 193.8722,
369
+ "eval_samples_per_second": 23.289,
370
+ "eval_steps_per_second": 1.46,
371
  "step": 281
372
  },
373
  {
374
  "epoch": 3.04,
375
+ "learning_rate": 0.0003879310344827586,
376
+ "loss": 0.192,
377
  "step": 285
378
  },
379
  {
380
  "epoch": 3.09,
381
+ "learning_rate": 0.0003771551724137931,
382
+ "loss": 0.2674,
383
  "step": 290
384
  },
385
  {
386
  "epoch": 3.15,
387
+ "learning_rate": 0.0003663793103448276,
388
+ "loss": 0.2207,
389
  "step": 295
390
  },
391
  {
392
  "epoch": 3.2,
393
+ "learning_rate": 0.00035560344827586203,
394
+ "loss": 0.2125,
395
  "step": 300
396
  },
397
  {
398
  "epoch": 3.25,
399
+ "learning_rate": 0.0003448275862068966,
400
+ "loss": 0.2431,
401
  "step": 305
402
  },
403
  {
404
  "epoch": 3.31,
405
+ "learning_rate": 0.0003340517241379311,
406
+ "loss": 0.1755,
407
  "step": 310
408
  },
409
  {
410
  "epoch": 3.36,
411
+ "learning_rate": 0.0003232758620689655,
412
+ "loss": 0.1636,
413
  "step": 315
414
  },
415
  {
416
  "epoch": 3.41,
417
+ "learning_rate": 0.0003125,
418
+ "loss": 0.1787,
419
  "step": 320
420
  },
421
  {
422
  "epoch": 3.47,
423
+ "learning_rate": 0.0003017241379310345,
424
+ "loss": 0.2748,
425
  "step": 325
426
  },
427
  {
428
  "epoch": 3.52,
429
+ "learning_rate": 0.00029094827586206894,
430
+ "loss": 0.315,
431
  "step": 330
432
  },
433
  {
434
  "epoch": 3.57,
435
+ "learning_rate": 0.00028017241379310343,
436
+ "loss": 0.1869,
437
  "step": 335
438
  },
439
  {
440
  "epoch": 3.63,
441
+ "learning_rate": 0.000269396551724138,
442
+ "loss": 0.1631,
443
  "step": 340
444
  },
445
  {
446
  "epoch": 3.68,
447
+ "learning_rate": 0.0002586206896551724,
448
+ "loss": 0.1704,
449
  "step": 345
450
  },
451
  {
452
  "epoch": 3.73,
453
+ "learning_rate": 0.0002478448275862069,
454
+ "loss": 0.1518,
455
  "step": 350
456
  },
457
  {
458
  "epoch": 3.79,
459
+ "learning_rate": 0.00023706896551724138,
460
+ "loss": 0.0793,
461
  "step": 355
462
  },
463
  {
464
  "epoch": 3.84,
465
+ "learning_rate": 0.00022629310344827585,
466
+ "loss": 0.1535,
467
  "step": 360
468
  },
469
  {
470
  "epoch": 3.89,
471
+ "learning_rate": 0.00021551724137931034,
472
+ "loss": 0.1496,
473
  "step": 365
474
  },
475
  {
476
  "epoch": 3.95,
477
+ "learning_rate": 0.00020474137931034484,
478
+ "loss": 0.2077,
479
  "step": 370
480
  },
481
  {
482
  "epoch": 4.0,
483
+ "learning_rate": 0.0001939655172413793,
484
+ "loss": 0.1667,
485
  "step": 375
486
  },
487
  {
488
  "epoch": 4.0,
489
+ "eval_accuracy": 0.9583610188261351,
490
+ "eval_loss": 0.11267491430044174,
491
+ "eval_runtime": 193.9805,
492
+ "eval_samples_per_second": 23.276,
493
+ "eval_steps_per_second": 1.459,
494
  "step": 375
495
  },
496
  {
497
  "epoch": 4.05,
498
+ "learning_rate": 0.0001831896551724138,
499
+ "loss": 0.1267,
500
  "step": 380
501
  },
502
  {
503
  "epoch": 4.11,
504
+ "learning_rate": 0.0001724137931034483,
505
+ "loss": 0.1083,
506
  "step": 385
507
  },
508
  {
509
  "epoch": 4.16,
510
+ "learning_rate": 0.00016163793103448276,
511
+ "loss": 0.1441,
512
  "step": 390
513
  },
514
  {
515
  "epoch": 4.21,
516
+ "learning_rate": 0.00015086206896551725,
517
+ "loss": 0.1187,
518
  "step": 395
519
  },
520
  {
521
  "epoch": 4.27,
522
+ "learning_rate": 0.00014008620689655172,
523
+ "loss": 0.1174,
524
  "step": 400
525
  },
526
  {
527
  "epoch": 4.32,
528
+ "learning_rate": 0.0001293103448275862,
529
+ "loss": 0.0663,
530
  "step": 405
531
  },
532
  {
533
  "epoch": 4.37,
534
+ "learning_rate": 0.00011853448275862069,
535
+ "loss": 0.075,
536
  "step": 410
537
  },
538
  {
539
  "epoch": 4.43,
540
+ "learning_rate": 0.00010775862068965517,
541
+ "loss": 0.1271,
542
  "step": 415
543
  },
544
  {
545
  "epoch": 4.48,
546
+ "learning_rate": 9.698275862068965e-05,
547
+ "loss": 0.1644,
548
  "step": 420
549
  },
550
  {
551
  "epoch": 4.53,
552
+ "learning_rate": 8.620689655172414e-05,
553
+ "loss": 0.1401,
554
  "step": 425
555
  },
556
  {
557
  "epoch": 4.59,
558
+ "learning_rate": 7.543103448275863e-05,
559
+ "loss": 0.12,
560
  "step": 430
561
  },
562
  {
563
  "epoch": 4.64,
564
+ "learning_rate": 6.46551724137931e-05,
565
+ "loss": 0.1086,
566
  "step": 435
567
  },
568
  {
569
  "epoch": 4.69,
570
+ "learning_rate": 5.3879310344827585e-05,
571
+ "loss": 0.1067,
572
  "step": 440
573
  },
574
  {
575
  "epoch": 4.75,
576
+ "learning_rate": 4.310344827586207e-05,
577
+ "loss": 0.0789,
578
  "step": 445
579
  },
580
  {
581
  "epoch": 4.8,
582
+ "learning_rate": 3.232758620689655e-05,
583
+ "loss": 0.0696,
584
  "step": 450
585
  },
586
  {
587
  "epoch": 4.85,
588
+ "learning_rate": 2.1551724137931036e-05,
589
+ "loss": 0.0584,
590
  "step": 455
591
  },
592
  {
593
  "epoch": 4.91,
594
+ "learning_rate": 1.0775862068965518e-05,
595
+ "loss": 0.1217,
596
  "step": 460
597
  },
598
  {
599
  "epoch": 4.96,
600
  "learning_rate": 0.0,
601
+ "loss": 0.0572,
602
  "step": 465
603
  },
604
  {
605
  "epoch": 4.96,
606
+ "eval_accuracy": 0.9805094130675526,
607
+ "eval_loss": 0.04738219827413559,
608
+ "eval_runtime": 193.9079,
609
+ "eval_samples_per_second": 23.284,
610
+ "eval_steps_per_second": 1.459,
611
  "step": 465
612
  },
613
  {
614
  "epoch": 4.96,
615
  "step": 465,
616
  "total_flos": 8.138660625246413e+18,
617
+ "train_loss": 0.23329446437538312,
618
+ "train_runtime": 4933.7001,
619
+ "train_samples_per_second": 6.065,
620
+ "train_steps_per_second": 0.094
621
  }
622
  ],
623
  "max_steps": 465,