Abhiram4 commited on
Commit
dbd040e
1 Parent(s): 7fafb4a

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +13 -0
  2. eval_results.json +8 -0
  3. train_results.json +8 -0
  4. trainer_state.json +1009 -0
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.98,
3
+ "eval_accuracy": 0.998719590268886,
4
+ "eval_loss": 0.06100322678685188,
5
+ "eval_runtime": 64.552,
6
+ "eval_samples_per_second": 108.889,
7
+ "eval_steps_per_second": 1.704,
8
+ "total_flos": 3.041208679733851e+19,
9
+ "train_loss": 0.46931519971240676,
10
+ "train_runtime": 11436.94,
11
+ "train_samples_per_second": 34.419,
12
+ "train_steps_per_second": 0.134
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.98,
3
+ "eval_accuracy": 0.998719590268886,
4
+ "eval_loss": 0.06100322678685188,
5
+ "eval_runtime": 64.552,
6
+ "eval_samples_per_second": 108.889,
7
+ "eval_steps_per_second": 1.704
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.98,
3
+ "total_flos": 3.041208679733851e+19,
4
+ "train_loss": 0.46931519971240676,
5
+ "train_runtime": 11436.94,
6
+ "train_samples_per_second": 34.419,
7
+ "train_steps_per_second": 0.134
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1009 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.998719590268886,
3
+ "best_model_checkpoint": "PlantDiseaseDetectorV2/checkpoint-1533",
4
+ "epoch": 6.976109215017065,
5
+ "eval_steps": 500,
6
+ "global_step": 1533,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "learning_rate": 3.2467532467532465e-06,
14
+ "loss": 3.6354,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.09,
19
+ "learning_rate": 6.493506493506493e-06,
20
+ "loss": 3.6146,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.14,
25
+ "learning_rate": 9.740259740259742e-06,
26
+ "loss": 3.5784,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.18,
31
+ "learning_rate": 1.2987012987012986e-05,
32
+ "loss": 3.5183,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.23,
37
+ "learning_rate": 1.6233766233766234e-05,
38
+ "loss": 3.435,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.27,
43
+ "learning_rate": 1.9480519480519483e-05,
44
+ "loss": 3.3236,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.32,
49
+ "learning_rate": 2.272727272727273e-05,
50
+ "loss": 3.1774,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.36,
55
+ "learning_rate": 2.5974025974025972e-05,
56
+ "loss": 3.0042,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.41,
61
+ "learning_rate": 2.922077922077922e-05,
62
+ "loss": 2.8095,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.46,
67
+ "learning_rate": 3.246753246753247e-05,
68
+ "loss": 2.6034,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.5,
73
+ "learning_rate": 3.571428571428572e-05,
74
+ "loss": 2.3817,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.55,
79
+ "learning_rate": 3.8961038961038966e-05,
80
+ "loss": 2.1751,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.59,
85
+ "learning_rate": 4.220779220779221e-05,
86
+ "loss": 1.9796,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.64,
91
+ "learning_rate": 4.545454545454546e-05,
92
+ "loss": 1.7805,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.68,
97
+ "learning_rate": 4.87012987012987e-05,
98
+ "loss": 1.6209,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.73,
103
+ "learning_rate": 4.978245105148659e-05,
104
+ "loss": 1.4515,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 0.77,
109
+ "learning_rate": 4.941986947063089e-05,
110
+ "loss": 1.3197,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 0.82,
115
+ "learning_rate": 4.90572878897752e-05,
116
+ "loss": 1.1848,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 0.86,
121
+ "learning_rate": 4.8694706308919505e-05,
122
+ "loss": 1.0976,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 0.91,
127
+ "learning_rate": 4.833212472806382e-05,
128
+ "loss": 0.9857,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 0.96,
133
+ "learning_rate": 4.7969543147208126e-05,
134
+ "loss": 0.9051,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 1.0,
139
+ "eval_accuracy": 0.986057760705648,
140
+ "eval_loss": 0.8024668097496033,
141
+ "eval_runtime": 70.6659,
142
+ "eval_samples_per_second": 99.468,
143
+ "eval_steps_per_second": 1.557,
144
+ "step": 219
145
+ },
146
+ {
147
+ "epoch": 1.0,
148
+ "learning_rate": 4.760696156635243e-05,
149
+ "loss": 0.8431,
150
+ "step": 220
151
+ },
152
+ {
153
+ "epoch": 1.05,
154
+ "learning_rate": 4.724437998549674e-05,
155
+ "loss": 0.7784,
156
+ "step": 230
157
+ },
158
+ {
159
+ "epoch": 1.09,
160
+ "learning_rate": 4.6881798404641046e-05,
161
+ "loss": 0.7198,
162
+ "step": 240
163
+ },
164
+ {
165
+ "epoch": 1.14,
166
+ "learning_rate": 4.651921682378535e-05,
167
+ "loss": 0.6712,
168
+ "step": 250
169
+ },
170
+ {
171
+ "epoch": 1.18,
172
+ "learning_rate": 4.6156635242929666e-05,
173
+ "loss": 0.6365,
174
+ "step": 260
175
+ },
176
+ {
177
+ "epoch": 1.23,
178
+ "learning_rate": 4.579405366207397e-05,
179
+ "loss": 0.5892,
180
+ "step": 270
181
+ },
182
+ {
183
+ "epoch": 1.27,
184
+ "learning_rate": 4.543147208121827e-05,
185
+ "loss": 0.5563,
186
+ "step": 280
187
+ },
188
+ {
189
+ "epoch": 1.32,
190
+ "learning_rate": 4.506889050036258e-05,
191
+ "loss": 0.5254,
192
+ "step": 290
193
+ },
194
+ {
195
+ "epoch": 1.37,
196
+ "learning_rate": 4.4706308919506886e-05,
197
+ "loss": 0.4982,
198
+ "step": 300
199
+ },
200
+ {
201
+ "epoch": 1.41,
202
+ "learning_rate": 4.43437273386512e-05,
203
+ "loss": 0.4667,
204
+ "step": 310
205
+ },
206
+ {
207
+ "epoch": 1.46,
208
+ "learning_rate": 4.398114575779551e-05,
209
+ "loss": 0.4481,
210
+ "step": 320
211
+ },
212
+ {
213
+ "epoch": 1.5,
214
+ "learning_rate": 4.3618564176939813e-05,
215
+ "loss": 0.4199,
216
+ "step": 330
217
+ },
218
+ {
219
+ "epoch": 1.55,
220
+ "learning_rate": 4.325598259608412e-05,
221
+ "loss": 0.4117,
222
+ "step": 340
223
+ },
224
+ {
225
+ "epoch": 1.59,
226
+ "learning_rate": 4.289340101522843e-05,
227
+ "loss": 0.3867,
228
+ "step": 350
229
+ },
230
+ {
231
+ "epoch": 1.64,
232
+ "learning_rate": 4.253081943437274e-05,
233
+ "loss": 0.3736,
234
+ "step": 360
235
+ },
236
+ {
237
+ "epoch": 1.68,
238
+ "learning_rate": 4.216823785351705e-05,
239
+ "loss": 0.3668,
240
+ "step": 370
241
+ },
242
+ {
243
+ "epoch": 1.73,
244
+ "learning_rate": 4.1805656272661354e-05,
245
+ "loss": 0.3369,
246
+ "step": 380
247
+ },
248
+ {
249
+ "epoch": 1.77,
250
+ "learning_rate": 4.1443074691805654e-05,
251
+ "loss": 0.3416,
252
+ "step": 390
253
+ },
254
+ {
255
+ "epoch": 1.82,
256
+ "learning_rate": 4.108049311094996e-05,
257
+ "loss": 0.3287,
258
+ "step": 400
259
+ },
260
+ {
261
+ "epoch": 1.87,
262
+ "learning_rate": 4.0717911530094274e-05,
263
+ "loss": 0.2989,
264
+ "step": 410
265
+ },
266
+ {
267
+ "epoch": 1.91,
268
+ "learning_rate": 4.035532994923858e-05,
269
+ "loss": 0.2934,
270
+ "step": 420
271
+ },
272
+ {
273
+ "epoch": 1.96,
274
+ "learning_rate": 3.999274836838289e-05,
275
+ "loss": 0.2801,
276
+ "step": 430
277
+ },
278
+ {
279
+ "epoch": 2.0,
280
+ "eval_accuracy": 0.9958742353108551,
281
+ "eval_loss": 0.26061177253723145,
282
+ "eval_runtime": 71.3214,
283
+ "eval_samples_per_second": 98.554,
284
+ "eval_steps_per_second": 1.542,
285
+ "step": 439
286
+ },
287
+ {
288
+ "epoch": 2.0,
289
+ "learning_rate": 3.9630166787527195e-05,
290
+ "loss": 0.2742,
291
+ "step": 440
292
+ },
293
+ {
294
+ "epoch": 2.05,
295
+ "learning_rate": 3.92675852066715e-05,
296
+ "loss": 0.2615,
297
+ "step": 450
298
+ },
299
+ {
300
+ "epoch": 2.09,
301
+ "learning_rate": 3.890500362581581e-05,
302
+ "loss": 0.2512,
303
+ "step": 460
304
+ },
305
+ {
306
+ "epoch": 2.14,
307
+ "learning_rate": 3.854242204496012e-05,
308
+ "loss": 0.2442,
309
+ "step": 470
310
+ },
311
+ {
312
+ "epoch": 2.18,
313
+ "learning_rate": 3.817984046410443e-05,
314
+ "loss": 0.2369,
315
+ "step": 480
316
+ },
317
+ {
318
+ "epoch": 2.23,
319
+ "learning_rate": 3.7817258883248735e-05,
320
+ "loss": 0.2288,
321
+ "step": 490
322
+ },
323
+ {
324
+ "epoch": 2.28,
325
+ "learning_rate": 3.7454677302393035e-05,
326
+ "loss": 0.2226,
327
+ "step": 500
328
+ },
329
+ {
330
+ "epoch": 2.32,
331
+ "learning_rate": 3.709209572153735e-05,
332
+ "loss": 0.2162,
333
+ "step": 510
334
+ },
335
+ {
336
+ "epoch": 2.37,
337
+ "learning_rate": 3.6729514140681655e-05,
338
+ "loss": 0.2055,
339
+ "step": 520
340
+ },
341
+ {
342
+ "epoch": 2.41,
343
+ "learning_rate": 3.636693255982596e-05,
344
+ "loss": 0.199,
345
+ "step": 530
346
+ },
347
+ {
348
+ "epoch": 2.46,
349
+ "learning_rate": 3.600435097897027e-05,
350
+ "loss": 0.2128,
351
+ "step": 540
352
+ },
353
+ {
354
+ "epoch": 2.5,
355
+ "learning_rate": 3.5641769398114576e-05,
356
+ "loss": 0.1885,
357
+ "step": 550
358
+ },
359
+ {
360
+ "epoch": 2.55,
361
+ "learning_rate": 3.527918781725888e-05,
362
+ "loss": 0.1854,
363
+ "step": 560
364
+ },
365
+ {
366
+ "epoch": 2.59,
367
+ "learning_rate": 3.4916606236403196e-05,
368
+ "loss": 0.1845,
369
+ "step": 570
370
+ },
371
+ {
372
+ "epoch": 2.64,
373
+ "learning_rate": 3.45540246555475e-05,
374
+ "loss": 0.1814,
375
+ "step": 580
376
+ },
377
+ {
378
+ "epoch": 2.68,
379
+ "learning_rate": 3.419144307469181e-05,
380
+ "loss": 0.1816,
381
+ "step": 590
382
+ },
383
+ {
384
+ "epoch": 2.73,
385
+ "learning_rate": 3.3828861493836116e-05,
386
+ "loss": 0.1689,
387
+ "step": 600
388
+ },
389
+ {
390
+ "epoch": 2.78,
391
+ "learning_rate": 3.3466279912980416e-05,
392
+ "loss": 0.1658,
393
+ "step": 610
394
+ },
395
+ {
396
+ "epoch": 2.82,
397
+ "learning_rate": 3.310369833212473e-05,
398
+ "loss": 0.1612,
399
+ "step": 620
400
+ },
401
+ {
402
+ "epoch": 2.87,
403
+ "learning_rate": 3.2741116751269036e-05,
404
+ "loss": 0.1572,
405
+ "step": 630
406
+ },
407
+ {
408
+ "epoch": 2.91,
409
+ "learning_rate": 3.237853517041334e-05,
410
+ "loss": 0.1552,
411
+ "step": 640
412
+ },
413
+ {
414
+ "epoch": 2.96,
415
+ "learning_rate": 3.201595358955765e-05,
416
+ "loss": 0.1455,
417
+ "step": 650
418
+ },
419
+ {
420
+ "epoch": 3.0,
421
+ "eval_accuracy": 0.9972969127898705,
422
+ "eval_loss": 0.14022748172283173,
423
+ "eval_runtime": 71.3997,
424
+ "eval_samples_per_second": 98.446,
425
+ "eval_steps_per_second": 1.541,
426
+ "step": 659
427
+ },
428
+ {
429
+ "epoch": 3.0,
430
+ "learning_rate": 3.165337200870196e-05,
431
+ "loss": 0.1446,
432
+ "step": 660
433
+ },
434
+ {
435
+ "epoch": 3.05,
436
+ "learning_rate": 3.129079042784627e-05,
437
+ "loss": 0.1422,
438
+ "step": 670
439
+ },
440
+ {
441
+ "epoch": 3.09,
442
+ "learning_rate": 3.092820884699058e-05,
443
+ "loss": 0.1411,
444
+ "step": 680
445
+ },
446
+ {
447
+ "epoch": 3.14,
448
+ "learning_rate": 3.0565627266134884e-05,
449
+ "loss": 0.1334,
450
+ "step": 690
451
+ },
452
+ {
453
+ "epoch": 3.19,
454
+ "learning_rate": 3.020304568527919e-05,
455
+ "loss": 0.1333,
456
+ "step": 700
457
+ },
458
+ {
459
+ "epoch": 3.23,
460
+ "learning_rate": 2.98404641044235e-05,
461
+ "loss": 0.1325,
462
+ "step": 710
463
+ },
464
+ {
465
+ "epoch": 3.28,
466
+ "learning_rate": 2.9477882523567807e-05,
467
+ "loss": 0.1278,
468
+ "step": 720
469
+ },
470
+ {
471
+ "epoch": 3.32,
472
+ "learning_rate": 2.911530094271211e-05,
473
+ "loss": 0.127,
474
+ "step": 730
475
+ },
476
+ {
477
+ "epoch": 3.37,
478
+ "learning_rate": 2.8752719361856417e-05,
479
+ "loss": 0.1272,
480
+ "step": 740
481
+ },
482
+ {
483
+ "epoch": 3.41,
484
+ "learning_rate": 2.8390137781000724e-05,
485
+ "loss": 0.1289,
486
+ "step": 750
487
+ },
488
+ {
489
+ "epoch": 3.46,
490
+ "learning_rate": 2.8027556200145034e-05,
491
+ "loss": 0.1235,
492
+ "step": 760
493
+ },
494
+ {
495
+ "epoch": 3.5,
496
+ "learning_rate": 2.766497461928934e-05,
497
+ "loss": 0.1194,
498
+ "step": 770
499
+ },
500
+ {
501
+ "epoch": 3.55,
502
+ "learning_rate": 2.7302393038433648e-05,
503
+ "loss": 0.1179,
504
+ "step": 780
505
+ },
506
+ {
507
+ "epoch": 3.59,
508
+ "learning_rate": 2.6939811457577958e-05,
509
+ "loss": 0.11,
510
+ "step": 790
511
+ },
512
+ {
513
+ "epoch": 3.64,
514
+ "learning_rate": 2.6577229876722265e-05,
515
+ "loss": 0.1126,
516
+ "step": 800
517
+ },
518
+ {
519
+ "epoch": 3.69,
520
+ "learning_rate": 2.621464829586657e-05,
521
+ "loss": 0.1106,
522
+ "step": 810
523
+ },
524
+ {
525
+ "epoch": 3.73,
526
+ "learning_rate": 2.585206671501088e-05,
527
+ "loss": 0.1073,
528
+ "step": 820
529
+ },
530
+ {
531
+ "epoch": 3.78,
532
+ "learning_rate": 2.548948513415519e-05,
533
+ "loss": 0.1024,
534
+ "step": 830
535
+ },
536
+ {
537
+ "epoch": 3.82,
538
+ "learning_rate": 2.5126903553299492e-05,
539
+ "loss": 0.1001,
540
+ "step": 840
541
+ },
542
+ {
543
+ "epoch": 3.87,
544
+ "learning_rate": 2.4764321972443802e-05,
545
+ "loss": 0.1013,
546
+ "step": 850
547
+ },
548
+ {
549
+ "epoch": 3.91,
550
+ "learning_rate": 2.440174039158811e-05,
551
+ "loss": 0.1003,
552
+ "step": 860
553
+ },
554
+ {
555
+ "epoch": 3.96,
556
+ "learning_rate": 2.4039158810732415e-05,
557
+ "loss": 0.0949,
558
+ "step": 870
559
+ },
560
+ {
561
+ "epoch": 4.0,
562
+ "eval_accuracy": 0.9985773225209845,
563
+ "eval_loss": 0.09417087584733963,
564
+ "eval_runtime": 71.2514,
565
+ "eval_samples_per_second": 98.651,
566
+ "eval_steps_per_second": 1.544,
567
+ "step": 879
568
+ },
569
+ {
570
+ "epoch": 4.0,
571
+ "learning_rate": 2.3676577229876722e-05,
572
+ "loss": 0.0965,
573
+ "step": 880
574
+ },
575
+ {
576
+ "epoch": 4.05,
577
+ "learning_rate": 2.3313995649021032e-05,
578
+ "loss": 0.0936,
579
+ "step": 890
580
+ },
581
+ {
582
+ "epoch": 4.1,
583
+ "learning_rate": 2.295141406816534e-05,
584
+ "loss": 0.0938,
585
+ "step": 900
586
+ },
587
+ {
588
+ "epoch": 4.14,
589
+ "learning_rate": 2.2588832487309646e-05,
590
+ "loss": 0.0907,
591
+ "step": 910
592
+ },
593
+ {
594
+ "epoch": 4.19,
595
+ "learning_rate": 2.2226250906453953e-05,
596
+ "loss": 0.0889,
597
+ "step": 920
598
+ },
599
+ {
600
+ "epoch": 4.23,
601
+ "learning_rate": 2.186366932559826e-05,
602
+ "loss": 0.0889,
603
+ "step": 930
604
+ },
605
+ {
606
+ "epoch": 4.28,
607
+ "learning_rate": 2.150108774474257e-05,
608
+ "loss": 0.0905,
609
+ "step": 940
610
+ },
611
+ {
612
+ "epoch": 4.32,
613
+ "learning_rate": 2.1138506163886876e-05,
614
+ "loss": 0.0888,
615
+ "step": 950
616
+ },
617
+ {
618
+ "epoch": 4.37,
619
+ "learning_rate": 2.0775924583031183e-05,
620
+ "loss": 0.0849,
621
+ "step": 960
622
+ },
623
+ {
624
+ "epoch": 4.41,
625
+ "learning_rate": 2.0413343002175493e-05,
626
+ "loss": 0.0852,
627
+ "step": 970
628
+ },
629
+ {
630
+ "epoch": 4.46,
631
+ "learning_rate": 2.0050761421319797e-05,
632
+ "loss": 0.0843,
633
+ "step": 980
634
+ },
635
+ {
636
+ "epoch": 4.51,
637
+ "learning_rate": 1.9688179840464103e-05,
638
+ "loss": 0.0851,
639
+ "step": 990
640
+ },
641
+ {
642
+ "epoch": 4.55,
643
+ "learning_rate": 1.9325598259608413e-05,
644
+ "loss": 0.0884,
645
+ "step": 1000
646
+ },
647
+ {
648
+ "epoch": 4.6,
649
+ "learning_rate": 1.896301667875272e-05,
650
+ "loss": 0.0899,
651
+ "step": 1010
652
+ },
653
+ {
654
+ "epoch": 4.64,
655
+ "learning_rate": 1.860043509789703e-05,
656
+ "loss": 0.0848,
657
+ "step": 1020
658
+ },
659
+ {
660
+ "epoch": 4.69,
661
+ "learning_rate": 1.8237853517041334e-05,
662
+ "loss": 0.0794,
663
+ "step": 1030
664
+ },
665
+ {
666
+ "epoch": 4.73,
667
+ "learning_rate": 1.787527193618564e-05,
668
+ "loss": 0.0802,
669
+ "step": 1040
670
+ },
671
+ {
672
+ "epoch": 4.78,
673
+ "learning_rate": 1.751269035532995e-05,
674
+ "loss": 0.0808,
675
+ "step": 1050
676
+ },
677
+ {
678
+ "epoch": 4.82,
679
+ "learning_rate": 1.7150108774474257e-05,
680
+ "loss": 0.0763,
681
+ "step": 1060
682
+ },
683
+ {
684
+ "epoch": 4.87,
685
+ "learning_rate": 1.6787527193618564e-05,
686
+ "loss": 0.0801,
687
+ "step": 1070
688
+ },
689
+ {
690
+ "epoch": 4.91,
691
+ "learning_rate": 1.6424945612762874e-05,
692
+ "loss": 0.0745,
693
+ "step": 1080
694
+ },
695
+ {
696
+ "epoch": 4.96,
697
+ "learning_rate": 1.6062364031907178e-05,
698
+ "loss": 0.0741,
699
+ "step": 1090
700
+ },
701
+ {
702
+ "epoch": 5.0,
703
+ "eval_accuracy": 0.9984350547730829,
704
+ "eval_loss": 0.07485666126012802,
705
+ "eval_runtime": 71.7237,
706
+ "eval_samples_per_second": 98.001,
707
+ "eval_steps_per_second": 1.534,
708
+ "step": 1098
709
+ },
710
+ {
711
+ "epoch": 5.01,
712
+ "learning_rate": 1.5699782451051488e-05,
713
+ "loss": 0.0738,
714
+ "step": 1100
715
+ },
716
+ {
717
+ "epoch": 5.05,
718
+ "learning_rate": 1.5337200870195794e-05,
719
+ "loss": 0.0729,
720
+ "step": 1110
721
+ },
722
+ {
723
+ "epoch": 5.1,
724
+ "learning_rate": 1.4974619289340103e-05,
725
+ "loss": 0.0748,
726
+ "step": 1120
727
+ },
728
+ {
729
+ "epoch": 5.14,
730
+ "learning_rate": 1.4612037708484411e-05,
731
+ "loss": 0.0698,
732
+ "step": 1130
733
+ },
734
+ {
735
+ "epoch": 5.19,
736
+ "learning_rate": 1.4249456127628716e-05,
737
+ "loss": 0.0721,
738
+ "step": 1140
739
+ },
740
+ {
741
+ "epoch": 5.23,
742
+ "learning_rate": 1.3886874546773023e-05,
743
+ "loss": 0.0748,
744
+ "step": 1150
745
+ },
746
+ {
747
+ "epoch": 5.28,
748
+ "learning_rate": 1.3524292965917332e-05,
749
+ "loss": 0.0695,
750
+ "step": 1160
751
+ },
752
+ {
753
+ "epoch": 5.32,
754
+ "learning_rate": 1.316171138506164e-05,
755
+ "loss": 0.0688,
756
+ "step": 1170
757
+ },
758
+ {
759
+ "epoch": 5.37,
760
+ "learning_rate": 1.2799129804205947e-05,
761
+ "loss": 0.0723,
762
+ "step": 1180
763
+ },
764
+ {
765
+ "epoch": 5.42,
766
+ "learning_rate": 1.2436548223350254e-05,
767
+ "loss": 0.0683,
768
+ "step": 1190
769
+ },
770
+ {
771
+ "epoch": 5.46,
772
+ "learning_rate": 1.2073966642494562e-05,
773
+ "loss": 0.0709,
774
+ "step": 1200
775
+ },
776
+ {
777
+ "epoch": 5.51,
778
+ "learning_rate": 1.1711385061638869e-05,
779
+ "loss": 0.0674,
780
+ "step": 1210
781
+ },
782
+ {
783
+ "epoch": 5.55,
784
+ "learning_rate": 1.1348803480783177e-05,
785
+ "loss": 0.0685,
786
+ "step": 1220
787
+ },
788
+ {
789
+ "epoch": 5.6,
790
+ "learning_rate": 1.0986221899927484e-05,
791
+ "loss": 0.0679,
792
+ "step": 1230
793
+ },
794
+ {
795
+ "epoch": 5.64,
796
+ "learning_rate": 1.062364031907179e-05,
797
+ "loss": 0.0672,
798
+ "step": 1240
799
+ },
800
+ {
801
+ "epoch": 5.69,
802
+ "learning_rate": 1.02610587382161e-05,
803
+ "loss": 0.0648,
804
+ "step": 1250
805
+ },
806
+ {
807
+ "epoch": 5.73,
808
+ "learning_rate": 9.898477157360408e-06,
809
+ "loss": 0.0659,
810
+ "step": 1260
811
+ },
812
+ {
813
+ "epoch": 5.78,
814
+ "learning_rate": 9.535895576504714e-06,
815
+ "loss": 0.0629,
816
+ "step": 1270
817
+ },
818
+ {
819
+ "epoch": 5.82,
820
+ "learning_rate": 9.173313995649021e-06,
821
+ "loss": 0.0627,
822
+ "step": 1280
823
+ },
824
+ {
825
+ "epoch": 5.87,
826
+ "learning_rate": 8.81073241479333e-06,
827
+ "loss": 0.0643,
828
+ "step": 1290
829
+ },
830
+ {
831
+ "epoch": 5.92,
832
+ "learning_rate": 8.448150833937636e-06,
833
+ "loss": 0.065,
834
+ "step": 1300
835
+ },
836
+ {
837
+ "epoch": 5.96,
838
+ "learning_rate": 8.085569253081945e-06,
839
+ "loss": 0.0623,
840
+ "step": 1310
841
+ },
842
+ {
843
+ "epoch": 6.0,
844
+ "eval_accuracy": 0.9984350547730829,
845
+ "eval_loss": 0.06421820819377899,
846
+ "eval_runtime": 71.8803,
847
+ "eval_samples_per_second": 97.788,
848
+ "eval_steps_per_second": 1.53,
849
+ "step": 1318
850
+ },
851
+ {
852
+ "epoch": 6.01,
853
+ "learning_rate": 7.72298767222625e-06,
854
+ "loss": 0.0634,
855
+ "step": 1320
856
+ },
857
+ {
858
+ "epoch": 6.05,
859
+ "learning_rate": 7.360406091370558e-06,
860
+ "loss": 0.0625,
861
+ "step": 1330
862
+ },
863
+ {
864
+ "epoch": 6.1,
865
+ "learning_rate": 6.997824510514867e-06,
866
+ "loss": 0.0612,
867
+ "step": 1340
868
+ },
869
+ {
870
+ "epoch": 6.14,
871
+ "learning_rate": 6.635242929659173e-06,
872
+ "loss": 0.0607,
873
+ "step": 1350
874
+ },
875
+ {
876
+ "epoch": 6.19,
877
+ "learning_rate": 6.272661348803481e-06,
878
+ "loss": 0.0595,
879
+ "step": 1360
880
+ },
881
+ {
882
+ "epoch": 6.23,
883
+ "learning_rate": 5.910079767947789e-06,
884
+ "loss": 0.0602,
885
+ "step": 1370
886
+ },
887
+ {
888
+ "epoch": 6.28,
889
+ "learning_rate": 5.547498187092096e-06,
890
+ "loss": 0.0606,
891
+ "step": 1380
892
+ },
893
+ {
894
+ "epoch": 6.33,
895
+ "learning_rate": 5.184916606236404e-06,
896
+ "loss": 0.0608,
897
+ "step": 1390
898
+ },
899
+ {
900
+ "epoch": 6.37,
901
+ "learning_rate": 4.822335025380711e-06,
902
+ "loss": 0.0621,
903
+ "step": 1400
904
+ },
905
+ {
906
+ "epoch": 6.42,
907
+ "learning_rate": 4.459753444525018e-06,
908
+ "loss": 0.0608,
909
+ "step": 1410
910
+ },
911
+ {
912
+ "epoch": 6.46,
913
+ "learning_rate": 4.097171863669326e-06,
914
+ "loss": 0.0615,
915
+ "step": 1420
916
+ },
917
+ {
918
+ "epoch": 6.51,
919
+ "learning_rate": 3.734590282813633e-06,
920
+ "loss": 0.0604,
921
+ "step": 1430
922
+ },
923
+ {
924
+ "epoch": 6.55,
925
+ "learning_rate": 3.3720087019579407e-06,
926
+ "loss": 0.0612,
927
+ "step": 1440
928
+ },
929
+ {
930
+ "epoch": 6.6,
931
+ "learning_rate": 3.0094271211022483e-06,
932
+ "loss": 0.058,
933
+ "step": 1450
934
+ },
935
+ {
936
+ "epoch": 6.64,
937
+ "learning_rate": 2.646845540246556e-06,
938
+ "loss": 0.0598,
939
+ "step": 1460
940
+ },
941
+ {
942
+ "epoch": 6.69,
943
+ "learning_rate": 2.284263959390863e-06,
944
+ "loss": 0.0592,
945
+ "step": 1470
946
+ },
947
+ {
948
+ "epoch": 6.73,
949
+ "learning_rate": 1.9216823785351706e-06,
950
+ "loss": 0.058,
951
+ "step": 1480
952
+ },
953
+ {
954
+ "epoch": 6.78,
955
+ "learning_rate": 1.559100797679478e-06,
956
+ "loss": 0.0573,
957
+ "step": 1490
958
+ },
959
+ {
960
+ "epoch": 6.83,
961
+ "learning_rate": 1.1965192168237854e-06,
962
+ "loss": 0.058,
963
+ "step": 1500
964
+ },
965
+ {
966
+ "epoch": 6.87,
967
+ "learning_rate": 8.339376359680928e-07,
968
+ "loss": 0.0603,
969
+ "step": 1510
970
+ },
971
+ {
972
+ "epoch": 6.92,
973
+ "learning_rate": 4.713560551124003e-07,
974
+ "loss": 0.0576,
975
+ "step": 1520
976
+ },
977
+ {
978
+ "epoch": 6.96,
979
+ "learning_rate": 1.0877447425670776e-07,
980
+ "loss": 0.0586,
981
+ "step": 1530
982
+ },
983
+ {
984
+ "epoch": 6.98,
985
+ "eval_accuracy": 0.998719590268886,
986
+ "eval_loss": 0.06100322678685188,
987
+ "eval_runtime": 70.9668,
988
+ "eval_samples_per_second": 99.046,
989
+ "eval_steps_per_second": 1.55,
990
+ "step": 1533
991
+ },
992
+ {
993
+ "epoch": 6.98,
994
+ "step": 1533,
995
+ "total_flos": 3.041208679733851e+19,
996
+ "train_loss": 0.46931519971240676,
997
+ "train_runtime": 11436.94,
998
+ "train_samples_per_second": 34.419,
999
+ "train_steps_per_second": 0.134
1000
+ }
1001
+ ],
1002
+ "logging_steps": 10,
1003
+ "max_steps": 1533,
1004
+ "num_train_epochs": 7,
1005
+ "save_steps": 500,
1006
+ "total_flos": 3.041208679733851e+19,
1007
+ "trial_name": null,
1008
+ "trial_params": null
1009
+ }