julianz1 commited on
Commit
c02540f
1 Parent(s): 2fd2989

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4.812819303609225e+18,
4
+ "train_loss": 1.6366877293570932,
5
+ "train_runtime": 3640.8494,
6
+ "train_samples_per_second": 53.054,
7
+ "train_steps_per_second": 0.414
8
+ }
runs/Jan11_09-42-41_e308505a2991/events.out.tfevents.1704972642.e308505a2991.24483.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270421cd5d34ea94cbeace003168510107298aab65849e14656855c2eeca67f0
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4.812819303609225e+18,
4
+ "train_loss": 1.6366877293570932,
5
+ "train_runtime": 3640.8494,
6
+ "train_samples_per_second": 53.054,
7
+ "train_steps_per_second": 0.414
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,964 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7760274575376221,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-food101/checkpoint-1509",
4
+ "epoch": 2.9985096870342773,
5
+ "eval_steps": 500,
6
+ "global_step": 1509,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 3.3112582781456956e-06,
14
+ "loss": 4.6663,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.04,
19
+ "learning_rate": 6.622516556291391e-06,
20
+ "loss": 4.6531,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.06,
25
+ "learning_rate": 9.933774834437086e-06,
26
+ "loss": 4.628,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.08,
31
+ "learning_rate": 1.3245033112582782e-05,
32
+ "loss": 4.613,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.1,
37
+ "learning_rate": 1.655629139072848e-05,
38
+ "loss": 4.5875,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.12,
43
+ "learning_rate": 1.9867549668874173e-05,
44
+ "loss": 4.5399,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.14,
49
+ "learning_rate": 2.317880794701987e-05,
50
+ "loss": 4.5013,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.16,
55
+ "learning_rate": 2.6490066225165565e-05,
56
+ "loss": 4.42,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.18,
61
+ "learning_rate": 2.980132450331126e-05,
62
+ "loss": 4.298,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.2,
67
+ "learning_rate": 3.311258278145696e-05,
68
+ "loss": 4.1074,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.22,
73
+ "learning_rate": 3.6423841059602646e-05,
74
+ "loss": 3.9249,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.24,
79
+ "learning_rate": 3.9735099337748346e-05,
80
+ "loss": 3.6311,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.26,
85
+ "learning_rate": 4.304635761589404e-05,
86
+ "loss": 3.4282,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.28,
91
+ "learning_rate": 4.635761589403974e-05,
92
+ "loss": 3.1668,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.3,
97
+ "learning_rate": 4.966887417218543e-05,
98
+ "loss": 3.0268,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.32,
103
+ "learning_rate": 4.966863033873343e-05,
104
+ "loss": 2.7985,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 0.34,
109
+ "learning_rate": 4.9300441826215024e-05,
110
+ "loss": 2.6903,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 0.36,
115
+ "learning_rate": 4.8932253313696616e-05,
116
+ "loss": 2.4989,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 0.38,
121
+ "learning_rate": 4.856406480117821e-05,
122
+ "loss": 2.5033,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 0.4,
127
+ "learning_rate": 4.8195876288659794e-05,
128
+ "loss": 2.3169,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 0.42,
133
+ "learning_rate": 4.7827687776141386e-05,
134
+ "loss": 2.3053,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 0.44,
139
+ "learning_rate": 4.745949926362298e-05,
140
+ "loss": 2.1974,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 0.46,
145
+ "learning_rate": 4.709131075110457e-05,
146
+ "loss": 2.0775,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 0.48,
151
+ "learning_rate": 4.6723122238586156e-05,
152
+ "loss": 2.0065,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 0.5,
157
+ "learning_rate": 4.635493372606775e-05,
158
+ "loss": 2.0521,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 0.52,
163
+ "learning_rate": 4.598674521354934e-05,
164
+ "loss": 1.9464,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 0.54,
169
+ "learning_rate": 4.561855670103093e-05,
170
+ "loss": 2.0498,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 0.56,
175
+ "learning_rate": 4.525036818851252e-05,
176
+ "loss": 1.9544,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 0.58,
181
+ "learning_rate": 4.488217967599411e-05,
182
+ "loss": 1.902,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 0.6,
187
+ "learning_rate": 4.45139911634757e-05,
188
+ "loss": 1.8034,
189
+ "step": 300
190
+ },
191
+ {
192
+ "epoch": 0.62,
193
+ "learning_rate": 4.4145802650957294e-05,
194
+ "loss": 1.7483,
195
+ "step": 310
196
+ },
197
+ {
198
+ "epoch": 0.64,
199
+ "learning_rate": 4.3777614138438886e-05,
200
+ "loss": 1.8315,
201
+ "step": 320
202
+ },
203
+ {
204
+ "epoch": 0.66,
205
+ "learning_rate": 4.340942562592047e-05,
206
+ "loss": 1.7785,
207
+ "step": 330
208
+ },
209
+ {
210
+ "epoch": 0.68,
211
+ "learning_rate": 4.3041237113402064e-05,
212
+ "loss": 1.85,
213
+ "step": 340
214
+ },
215
+ {
216
+ "epoch": 0.7,
217
+ "learning_rate": 4.2673048600883656e-05,
218
+ "loss": 1.7037,
219
+ "step": 350
220
+ },
221
+ {
222
+ "epoch": 0.72,
223
+ "learning_rate": 4.230486008836525e-05,
224
+ "loss": 1.6452,
225
+ "step": 360
226
+ },
227
+ {
228
+ "epoch": 0.74,
229
+ "learning_rate": 4.193667157584683e-05,
230
+ "loss": 1.6515,
231
+ "step": 370
232
+ },
233
+ {
234
+ "epoch": 0.76,
235
+ "learning_rate": 4.1568483063328425e-05,
236
+ "loss": 1.6375,
237
+ "step": 380
238
+ },
239
+ {
240
+ "epoch": 0.77,
241
+ "learning_rate": 4.120029455081002e-05,
242
+ "loss": 1.6522,
243
+ "step": 390
244
+ },
245
+ {
246
+ "epoch": 0.79,
247
+ "learning_rate": 4.083210603829161e-05,
248
+ "loss": 1.5982,
249
+ "step": 400
250
+ },
251
+ {
252
+ "epoch": 0.81,
253
+ "learning_rate": 4.0463917525773195e-05,
254
+ "loss": 1.5984,
255
+ "step": 410
256
+ },
257
+ {
258
+ "epoch": 0.83,
259
+ "learning_rate": 4.009572901325479e-05,
260
+ "loss": 1.5961,
261
+ "step": 420
262
+ },
263
+ {
264
+ "epoch": 0.85,
265
+ "learning_rate": 3.972754050073638e-05,
266
+ "loss": 1.6263,
267
+ "step": 430
268
+ },
269
+ {
270
+ "epoch": 0.87,
271
+ "learning_rate": 3.935935198821797e-05,
272
+ "loss": 1.5302,
273
+ "step": 440
274
+ },
275
+ {
276
+ "epoch": 0.89,
277
+ "learning_rate": 3.899116347569956e-05,
278
+ "loss": 1.5545,
279
+ "step": 450
280
+ },
281
+ {
282
+ "epoch": 0.91,
283
+ "learning_rate": 3.862297496318115e-05,
284
+ "loss": 1.5918,
285
+ "step": 460
286
+ },
287
+ {
288
+ "epoch": 0.93,
289
+ "learning_rate": 3.825478645066274e-05,
290
+ "loss": 1.539,
291
+ "step": 470
292
+ },
293
+ {
294
+ "epoch": 0.95,
295
+ "learning_rate": 3.7886597938144333e-05,
296
+ "loss": 1.5363,
297
+ "step": 480
298
+ },
299
+ {
300
+ "epoch": 0.97,
301
+ "learning_rate": 3.7518409425625926e-05,
302
+ "loss": 1.5359,
303
+ "step": 490
304
+ },
305
+ {
306
+ "epoch": 0.99,
307
+ "learning_rate": 3.715022091310751e-05,
308
+ "loss": 1.4777,
309
+ "step": 500
310
+ },
311
+ {
312
+ "epoch": 1.0,
313
+ "eval_accuracy": 0.7062395494147672,
314
+ "eval_loss": 1.1435766220092773,
315
+ "eval_runtime": 107.4159,
316
+ "eval_samples_per_second": 105.785,
317
+ "eval_steps_per_second": 3.314,
318
+ "step": 503
319
+ },
320
+ {
321
+ "epoch": 1.01,
322
+ "learning_rate": 3.67820324005891e-05,
323
+ "loss": 1.492,
324
+ "step": 510
325
+ },
326
+ {
327
+ "epoch": 1.03,
328
+ "learning_rate": 3.6413843888070695e-05,
329
+ "loss": 1.4226,
330
+ "step": 520
331
+ },
332
+ {
333
+ "epoch": 1.05,
334
+ "learning_rate": 3.604565537555229e-05,
335
+ "loss": 1.3538,
336
+ "step": 530
337
+ },
338
+ {
339
+ "epoch": 1.07,
340
+ "learning_rate": 3.567746686303387e-05,
341
+ "loss": 1.3888,
342
+ "step": 540
343
+ },
344
+ {
345
+ "epoch": 1.09,
346
+ "learning_rate": 3.5309278350515465e-05,
347
+ "loss": 1.3147,
348
+ "step": 550
349
+ },
350
+ {
351
+ "epoch": 1.11,
352
+ "learning_rate": 3.494108983799706e-05,
353
+ "loss": 1.414,
354
+ "step": 560
355
+ },
356
+ {
357
+ "epoch": 1.13,
358
+ "learning_rate": 3.457290132547865e-05,
359
+ "loss": 1.2426,
360
+ "step": 570
361
+ },
362
+ {
363
+ "epoch": 1.15,
364
+ "learning_rate": 3.4204712812960235e-05,
365
+ "loss": 1.3277,
366
+ "step": 580
367
+ },
368
+ {
369
+ "epoch": 1.17,
370
+ "learning_rate": 3.383652430044183e-05,
371
+ "loss": 1.3334,
372
+ "step": 590
373
+ },
374
+ {
375
+ "epoch": 1.19,
376
+ "learning_rate": 3.346833578792342e-05,
377
+ "loss": 1.3156,
378
+ "step": 600
379
+ },
380
+ {
381
+ "epoch": 1.21,
382
+ "learning_rate": 3.310014727540501e-05,
383
+ "loss": 1.2929,
384
+ "step": 610
385
+ },
386
+ {
387
+ "epoch": 1.23,
388
+ "learning_rate": 3.2731958762886596e-05,
389
+ "loss": 1.274,
390
+ "step": 620
391
+ },
392
+ {
393
+ "epoch": 1.25,
394
+ "learning_rate": 3.236377025036819e-05,
395
+ "loss": 1.3538,
396
+ "step": 630
397
+ },
398
+ {
399
+ "epoch": 1.27,
400
+ "learning_rate": 3.199558173784978e-05,
401
+ "loss": 1.3097,
402
+ "step": 640
403
+ },
404
+ {
405
+ "epoch": 1.29,
406
+ "learning_rate": 3.162739322533137e-05,
407
+ "loss": 1.3836,
408
+ "step": 650
409
+ },
410
+ {
411
+ "epoch": 1.31,
412
+ "learning_rate": 3.1259204712812965e-05,
413
+ "loss": 1.2941,
414
+ "step": 660
415
+ },
416
+ {
417
+ "epoch": 1.33,
418
+ "learning_rate": 3.089101620029455e-05,
419
+ "loss": 1.3041,
420
+ "step": 670
421
+ },
422
+ {
423
+ "epoch": 1.35,
424
+ "learning_rate": 3.052282768777614e-05,
425
+ "loss": 1.35,
426
+ "step": 680
427
+ },
428
+ {
429
+ "epoch": 1.37,
430
+ "learning_rate": 3.015463917525773e-05,
431
+ "loss": 1.2847,
432
+ "step": 690
433
+ },
434
+ {
435
+ "epoch": 1.39,
436
+ "learning_rate": 2.9786450662739324e-05,
437
+ "loss": 1.3606,
438
+ "step": 700
439
+ },
440
+ {
441
+ "epoch": 1.41,
442
+ "learning_rate": 2.9418262150220916e-05,
443
+ "loss": 1.2239,
444
+ "step": 710
445
+ },
446
+ {
447
+ "epoch": 1.43,
448
+ "learning_rate": 2.9050073637702508e-05,
449
+ "loss": 1.2794,
450
+ "step": 720
451
+ },
452
+ {
453
+ "epoch": 1.45,
454
+ "learning_rate": 2.8681885125184093e-05,
455
+ "loss": 1.2988,
456
+ "step": 730
457
+ },
458
+ {
459
+ "epoch": 1.47,
460
+ "learning_rate": 2.8313696612665685e-05,
461
+ "loss": 1.2714,
462
+ "step": 740
463
+ },
464
+ {
465
+ "epoch": 1.49,
466
+ "learning_rate": 2.7945508100147278e-05,
467
+ "loss": 1.2118,
468
+ "step": 750
469
+ },
470
+ {
471
+ "epoch": 1.51,
472
+ "learning_rate": 2.757731958762887e-05,
473
+ "loss": 1.2792,
474
+ "step": 760
475
+ },
476
+ {
477
+ "epoch": 1.53,
478
+ "learning_rate": 2.7209131075110455e-05,
479
+ "loss": 1.3307,
480
+ "step": 770
481
+ },
482
+ {
483
+ "epoch": 1.55,
484
+ "learning_rate": 2.6840942562592047e-05,
485
+ "loss": 1.2683,
486
+ "step": 780
487
+ },
488
+ {
489
+ "epoch": 1.57,
490
+ "learning_rate": 2.647275405007364e-05,
491
+ "loss": 1.2789,
492
+ "step": 790
493
+ },
494
+ {
495
+ "epoch": 1.59,
496
+ "learning_rate": 2.610456553755523e-05,
497
+ "loss": 1.261,
498
+ "step": 800
499
+ },
500
+ {
501
+ "epoch": 1.61,
502
+ "learning_rate": 2.5736377025036817e-05,
503
+ "loss": 1.3408,
504
+ "step": 810
505
+ },
506
+ {
507
+ "epoch": 1.63,
508
+ "learning_rate": 2.536818851251841e-05,
509
+ "loss": 1.27,
510
+ "step": 820
511
+ },
512
+ {
513
+ "epoch": 1.65,
514
+ "learning_rate": 2.5e-05,
515
+ "loss": 1.1915,
516
+ "step": 830
517
+ },
518
+ {
519
+ "epoch": 1.67,
520
+ "learning_rate": 2.4631811487481593e-05,
521
+ "loss": 1.1396,
522
+ "step": 840
523
+ },
524
+ {
525
+ "epoch": 1.69,
526
+ "learning_rate": 2.4263622974963182e-05,
527
+ "loss": 1.1942,
528
+ "step": 850
529
+ },
530
+ {
531
+ "epoch": 1.71,
532
+ "learning_rate": 2.3895434462444774e-05,
533
+ "loss": 1.1793,
534
+ "step": 860
535
+ },
536
+ {
537
+ "epoch": 1.73,
538
+ "learning_rate": 2.3527245949926363e-05,
539
+ "loss": 1.192,
540
+ "step": 870
541
+ },
542
+ {
543
+ "epoch": 1.75,
544
+ "learning_rate": 2.3159057437407955e-05,
545
+ "loss": 1.2951,
546
+ "step": 880
547
+ },
548
+ {
549
+ "epoch": 1.77,
550
+ "learning_rate": 2.2790868924889544e-05,
551
+ "loss": 1.2129,
552
+ "step": 890
553
+ },
554
+ {
555
+ "epoch": 1.79,
556
+ "learning_rate": 2.2422680412371136e-05,
557
+ "loss": 1.192,
558
+ "step": 900
559
+ },
560
+ {
561
+ "epoch": 1.81,
562
+ "learning_rate": 2.2054491899852725e-05,
563
+ "loss": 1.2012,
564
+ "step": 910
565
+ },
566
+ {
567
+ "epoch": 1.83,
568
+ "learning_rate": 2.1686303387334317e-05,
569
+ "loss": 1.2543,
570
+ "step": 920
571
+ },
572
+ {
573
+ "epoch": 1.85,
574
+ "learning_rate": 2.1318114874815906e-05,
575
+ "loss": 1.2318,
576
+ "step": 930
577
+ },
578
+ {
579
+ "epoch": 1.87,
580
+ "learning_rate": 2.0949926362297498e-05,
581
+ "loss": 1.223,
582
+ "step": 940
583
+ },
584
+ {
585
+ "epoch": 1.89,
586
+ "learning_rate": 2.0581737849779087e-05,
587
+ "loss": 1.2347,
588
+ "step": 950
589
+ },
590
+ {
591
+ "epoch": 1.91,
592
+ "learning_rate": 2.021354933726068e-05,
593
+ "loss": 1.2412,
594
+ "step": 960
595
+ },
596
+ {
597
+ "epoch": 1.93,
598
+ "learning_rate": 1.9845360824742268e-05,
599
+ "loss": 1.1686,
600
+ "step": 970
601
+ },
602
+ {
603
+ "epoch": 1.95,
604
+ "learning_rate": 1.947717231222386e-05,
605
+ "loss": 1.1424,
606
+ "step": 980
607
+ },
608
+ {
609
+ "epoch": 1.97,
610
+ "learning_rate": 1.910898379970545e-05,
611
+ "loss": 1.2094,
612
+ "step": 990
613
+ },
614
+ {
615
+ "epoch": 1.99,
616
+ "learning_rate": 1.874079528718704e-05,
617
+ "loss": 1.2418,
618
+ "step": 1000
619
+ },
620
+ {
621
+ "epoch": 2.0,
622
+ "eval_accuracy": 0.7571063979582857,
623
+ "eval_loss": 0.9226512908935547,
624
+ "eval_runtime": 106.3435,
625
+ "eval_samples_per_second": 106.852,
626
+ "eval_steps_per_second": 3.348,
627
+ "step": 1006
628
+ },
629
+ {
630
+ "epoch": 2.01,
631
+ "learning_rate": 1.8372606774668633e-05,
632
+ "loss": 1.2256,
633
+ "step": 1010
634
+ },
635
+ {
636
+ "epoch": 2.03,
637
+ "learning_rate": 1.800441826215022e-05,
638
+ "loss": 1.1317,
639
+ "step": 1020
640
+ },
641
+ {
642
+ "epoch": 2.05,
643
+ "learning_rate": 1.7636229749631814e-05,
644
+ "loss": 1.1062,
645
+ "step": 1030
646
+ },
647
+ {
648
+ "epoch": 2.07,
649
+ "learning_rate": 1.7268041237113403e-05,
650
+ "loss": 1.1442,
651
+ "step": 1040
652
+ },
653
+ {
654
+ "epoch": 2.09,
655
+ "learning_rate": 1.6899852724594995e-05,
656
+ "loss": 1.0836,
657
+ "step": 1050
658
+ },
659
+ {
660
+ "epoch": 2.11,
661
+ "learning_rate": 1.6531664212076583e-05,
662
+ "loss": 1.1139,
663
+ "step": 1060
664
+ },
665
+ {
666
+ "epoch": 2.13,
667
+ "learning_rate": 1.6163475699558176e-05,
668
+ "loss": 1.0567,
669
+ "step": 1070
670
+ },
671
+ {
672
+ "epoch": 2.15,
673
+ "learning_rate": 1.5795287187039764e-05,
674
+ "loss": 1.0716,
675
+ "step": 1080
676
+ },
677
+ {
678
+ "epoch": 2.17,
679
+ "learning_rate": 1.5427098674521357e-05,
680
+ "loss": 1.1631,
681
+ "step": 1090
682
+ },
683
+ {
684
+ "epoch": 2.19,
685
+ "learning_rate": 1.5058910162002945e-05,
686
+ "loss": 1.0727,
687
+ "step": 1100
688
+ },
689
+ {
690
+ "epoch": 2.21,
691
+ "learning_rate": 1.4690721649484537e-05,
692
+ "loss": 1.1132,
693
+ "step": 1110
694
+ },
695
+ {
696
+ "epoch": 2.23,
697
+ "learning_rate": 1.4322533136966126e-05,
698
+ "loss": 1.0497,
699
+ "step": 1120
700
+ },
701
+ {
702
+ "epoch": 2.25,
703
+ "learning_rate": 1.3954344624447718e-05,
704
+ "loss": 1.1202,
705
+ "step": 1130
706
+ },
707
+ {
708
+ "epoch": 2.27,
709
+ "learning_rate": 1.3586156111929307e-05,
710
+ "loss": 1.0817,
711
+ "step": 1140
712
+ },
713
+ {
714
+ "epoch": 2.29,
715
+ "learning_rate": 1.32179675994109e-05,
716
+ "loss": 1.1048,
717
+ "step": 1150
718
+ },
719
+ {
720
+ "epoch": 2.31,
721
+ "learning_rate": 1.2849779086892488e-05,
722
+ "loss": 1.1784,
723
+ "step": 1160
724
+ },
725
+ {
726
+ "epoch": 2.32,
727
+ "learning_rate": 1.248159057437408e-05,
728
+ "loss": 1.08,
729
+ "step": 1170
730
+ },
731
+ {
732
+ "epoch": 2.34,
733
+ "learning_rate": 1.211340206185567e-05,
734
+ "loss": 1.1461,
735
+ "step": 1180
736
+ },
737
+ {
738
+ "epoch": 2.36,
739
+ "learning_rate": 1.1745213549337261e-05,
740
+ "loss": 1.0777,
741
+ "step": 1190
742
+ },
743
+ {
744
+ "epoch": 2.38,
745
+ "learning_rate": 1.1377025036818852e-05,
746
+ "loss": 1.0184,
747
+ "step": 1200
748
+ },
749
+ {
750
+ "epoch": 2.4,
751
+ "learning_rate": 1.1008836524300442e-05,
752
+ "loss": 1.0455,
753
+ "step": 1210
754
+ },
755
+ {
756
+ "epoch": 2.42,
757
+ "learning_rate": 1.0640648011782033e-05,
758
+ "loss": 1.095,
759
+ "step": 1220
760
+ },
761
+ {
762
+ "epoch": 2.44,
763
+ "learning_rate": 1.0272459499263623e-05,
764
+ "loss": 1.1394,
765
+ "step": 1230
766
+ },
767
+ {
768
+ "epoch": 2.46,
769
+ "learning_rate": 9.904270986745213e-06,
770
+ "loss": 1.1076,
771
+ "step": 1240
772
+ },
773
+ {
774
+ "epoch": 2.48,
775
+ "learning_rate": 9.536082474226804e-06,
776
+ "loss": 1.0063,
777
+ "step": 1250
778
+ },
779
+ {
780
+ "epoch": 2.5,
781
+ "learning_rate": 9.167893961708394e-06,
782
+ "loss": 1.1044,
783
+ "step": 1260
784
+ },
785
+ {
786
+ "epoch": 2.52,
787
+ "learning_rate": 8.799705449189986e-06,
788
+ "loss": 1.041,
789
+ "step": 1270
790
+ },
791
+ {
792
+ "epoch": 2.54,
793
+ "learning_rate": 8.431516936671577e-06,
794
+ "loss": 1.0551,
795
+ "step": 1280
796
+ },
797
+ {
798
+ "epoch": 2.56,
799
+ "learning_rate": 8.063328424153167e-06,
800
+ "loss": 1.0601,
801
+ "step": 1290
802
+ },
803
+ {
804
+ "epoch": 2.58,
805
+ "learning_rate": 7.695139911634758e-06,
806
+ "loss": 1.1787,
807
+ "step": 1300
808
+ },
809
+ {
810
+ "epoch": 2.6,
811
+ "learning_rate": 7.326951399116348e-06,
812
+ "loss": 1.0445,
813
+ "step": 1310
814
+ },
815
+ {
816
+ "epoch": 2.62,
817
+ "learning_rate": 6.958762886597939e-06,
818
+ "loss": 1.105,
819
+ "step": 1320
820
+ },
821
+ {
822
+ "epoch": 2.64,
823
+ "learning_rate": 6.590574374079529e-06,
824
+ "loss": 1.0418,
825
+ "step": 1330
826
+ },
827
+ {
828
+ "epoch": 2.66,
829
+ "learning_rate": 6.22238586156112e-06,
830
+ "loss": 0.9967,
831
+ "step": 1340
832
+ },
833
+ {
834
+ "epoch": 2.68,
835
+ "learning_rate": 5.85419734904271e-06,
836
+ "loss": 1.0506,
837
+ "step": 1350
838
+ },
839
+ {
840
+ "epoch": 2.7,
841
+ "learning_rate": 5.486008836524301e-06,
842
+ "loss": 1.1154,
843
+ "step": 1360
844
+ },
845
+ {
846
+ "epoch": 2.72,
847
+ "learning_rate": 5.117820324005891e-06,
848
+ "loss": 1.058,
849
+ "step": 1370
850
+ },
851
+ {
852
+ "epoch": 2.74,
853
+ "learning_rate": 4.7496318114874815e-06,
854
+ "loss": 1.0997,
855
+ "step": 1380
856
+ },
857
+ {
858
+ "epoch": 2.76,
859
+ "learning_rate": 4.381443298969072e-06,
860
+ "loss": 1.0751,
861
+ "step": 1390
862
+ },
863
+ {
864
+ "epoch": 2.78,
865
+ "learning_rate": 4.013254786450663e-06,
866
+ "loss": 1.0658,
867
+ "step": 1400
868
+ },
869
+ {
870
+ "epoch": 2.8,
871
+ "learning_rate": 3.6450662739322538e-06,
872
+ "loss": 1.0243,
873
+ "step": 1410
874
+ },
875
+ {
876
+ "epoch": 2.82,
877
+ "learning_rate": 3.2768777614138442e-06,
878
+ "loss": 1.003,
879
+ "step": 1420
880
+ },
881
+ {
882
+ "epoch": 2.84,
883
+ "learning_rate": 2.9086892488954347e-06,
884
+ "loss": 1.0251,
885
+ "step": 1430
886
+ },
887
+ {
888
+ "epoch": 2.86,
889
+ "learning_rate": 2.540500736377025e-06,
890
+ "loss": 1.0049,
891
+ "step": 1440
892
+ },
893
+ {
894
+ "epoch": 2.88,
895
+ "learning_rate": 2.1723122238586156e-06,
896
+ "loss": 1.1317,
897
+ "step": 1450
898
+ },
899
+ {
900
+ "epoch": 2.9,
901
+ "learning_rate": 1.804123711340206e-06,
902
+ "loss": 1.063,
903
+ "step": 1460
904
+ },
905
+ {
906
+ "epoch": 2.92,
907
+ "learning_rate": 1.4359351988217967e-06,
908
+ "loss": 1.0558,
909
+ "step": 1470
910
+ },
911
+ {
912
+ "epoch": 2.94,
913
+ "learning_rate": 1.0677466863033874e-06,
914
+ "loss": 1.0737,
915
+ "step": 1480
916
+ },
917
+ {
918
+ "epoch": 2.96,
919
+ "learning_rate": 6.99558173784978e-07,
920
+ "loss": 1.0705,
921
+ "step": 1490
922
+ },
923
+ {
924
+ "epoch": 2.98,
925
+ "learning_rate": 3.313696612665685e-07,
926
+ "loss": 1.0657,
927
+ "step": 1500
928
+ },
929
+ {
930
+ "epoch": 3.0,
931
+ "eval_accuracy": 0.7760274575376221,
932
+ "eval_loss": 0.8486079573631287,
933
+ "eval_runtime": 106.6601,
934
+ "eval_samples_per_second": 106.535,
935
+ "eval_steps_per_second": 3.338,
936
+ "step": 1509
937
+ },
938
+ {
939
+ "epoch": 3.0,
940
+ "step": 1509,
941
+ "total_flos": 4.812819303609225e+18,
942
+ "train_loss": 1.6366877293570932,
943
+ "train_runtime": 3640.8494,
944
+ "train_samples_per_second": 53.054,
945
+ "train_steps_per_second": 0.414
946
+ },
947
+ {
948
+ "epoch": 3.0,
949
+ "eval_accuracy": 0.7760274575376221,
950
+ "eval_loss": 0.8486079573631287,
951
+ "eval_runtime": 130.1364,
952
+ "eval_samples_per_second": 87.316,
953
+ "eval_steps_per_second": 2.736,
954
+ "step": 1509
955
+ }
956
+ ],
957
+ "logging_steps": 10,
958
+ "max_steps": 1509,
959
+ "num_train_epochs": 3,
960
+ "save_steps": 500,
961
+ "total_flos": 4.812819303609225e+18,
962
+ "trial_name": null,
963
+ "trial_params": null
964
+ }