dima806 commited on
Commit
4d7cca1
1 Parent(s): c3c39be

Upload folder using huggingface_hub

Browse files
checkpoint-15680/config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "01",
13
+ "1": "02",
14
+ "2": "03",
15
+ "3": "04",
16
+ "4": "05",
17
+ "5": "06-07",
18
+ "6": "08-09",
19
+ "7": "10-12",
20
+ "8": "13-15",
21
+ "9": "16-20",
22
+ "10": "21-25",
23
+ "11": "26-30",
24
+ "12": "31-35",
25
+ "13": "36-40",
26
+ "14": "41-45",
27
+ "15": "46-50",
28
+ "16": "51-55",
29
+ "17": "56-60",
30
+ "18": "61-65",
31
+ "19": "66-70",
32
+ "20": "71-80",
33
+ "21": "81-90",
34
+ "22": "90+"
35
+ },
36
+ "image_size": 224,
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": 3072,
39
+ "label2id": {
40
+ "01": 0,
41
+ "02": 1,
42
+ "03": 2,
43
+ "04": 3,
44
+ "05": 4,
45
+ "06-07": 5,
46
+ "08-09": 6,
47
+ "10-12": 7,
48
+ "13-15": 8,
49
+ "16-20": 9,
50
+ "21-25": 10,
51
+ "26-30": 11,
52
+ "31-35": 12,
53
+ "36-40": 13,
54
+ "41-45": 14,
55
+ "46-50": 15,
56
+ "51-55": 16,
57
+ "56-60": 17,
58
+ "61-65": 18,
59
+ "66-70": 19,
60
+ "71-80": 20,
61
+ "81-90": 21,
62
+ "90+": 22
63
+ },
64
+ "layer_norm_eps": 1e-12,
65
+ "model_type": "vit",
66
+ "num_attention_heads": 12,
67
+ "num_channels": 3,
68
+ "num_hidden_layers": 12,
69
+ "patch_size": 16,
70
+ "problem_type": "single_label_classification",
71
+ "qkv_bias": true,
72
+ "torch_dtype": "float32",
73
+ "transformers_version": "4.35.0"
74
+ }
checkpoint-15680/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a63ea460c98a52e8bcfec2f3c809237b00fc0d2b500c3c22931b40d44716a89
3
+ size 343288572
checkpoint-15680/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e35ffa8d8551b558989b2cdc71698b325ca62bef53a45aa68451429b6431648
3
+ size 686697605
checkpoint-15680/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-15680/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0eb2d80c5b666462bdf4036f202d6bf53049dd371621d2c5d88b2d37f2e8f9
3
+ size 14575
checkpoint-15680/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bda6364fb66beb98cceb41a2941e9310c217422f1c6a27aee2084e99292bcea
3
+ size 627
checkpoint-15680/trainer_state.json ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.5390384197235107,
3
+ "best_model_checkpoint": "facial_age_image_detection/checkpoint-15680",
4
+ "epoch": 49.0,
5
+ "eval_steps": 500,
6
+ "global_step": 15680,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.22556684910086006,
14
+ "eval_loss": 2.945213556289673,
15
+ "eval_runtime": 51.2577,
16
+ "eval_samples_per_second": 99.809,
17
+ "eval_steps_per_second": 3.121,
18
+ "step": 320
19
+ },
20
+ {
21
+ "epoch": 1.56,
22
+ "learning_rate": 2.915360501567398e-06,
23
+ "loss": 2.913,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "eval_accuracy": 0.2580140734949179,
29
+ "eval_loss": 2.7111563682556152,
30
+ "eval_runtime": 50.9659,
31
+ "eval_samples_per_second": 100.381,
32
+ "eval_steps_per_second": 3.139,
33
+ "step": 640
34
+ },
35
+ {
36
+ "epoch": 3.0,
37
+ "eval_accuracy": 0.2922204847537138,
38
+ "eval_loss": 2.557264804840088,
39
+ "eval_runtime": 51.3801,
40
+ "eval_samples_per_second": 99.572,
41
+ "eval_steps_per_second": 3.114,
42
+ "step": 960
43
+ },
44
+ {
45
+ "epoch": 3.12,
46
+ "learning_rate": 2.821316614420063e-06,
47
+ "loss": 2.4834,
48
+ "step": 1000
49
+ },
50
+ {
51
+ "epoch": 4.0,
52
+ "eval_accuracy": 0.3027756059421423,
53
+ "eval_loss": 2.448474645614624,
54
+ "eval_runtime": 51.2195,
55
+ "eval_samples_per_second": 99.884,
56
+ "eval_steps_per_second": 3.124,
57
+ "step": 1280
58
+ },
59
+ {
60
+ "epoch": 4.69,
61
+ "learning_rate": 2.7272727272727272e-06,
62
+ "loss": 2.246,
63
+ "step": 1500
64
+ },
65
+ {
66
+ "epoch": 5.0,
67
+ "eval_accuracy": 0.31841282251759184,
68
+ "eval_loss": 2.3704957962036133,
69
+ "eval_runtime": 51.7745,
70
+ "eval_samples_per_second": 98.813,
71
+ "eval_steps_per_second": 3.09,
72
+ "step": 1600
73
+ },
74
+ {
75
+ "epoch": 6.0,
76
+ "eval_accuracy": 0.3348318999218139,
77
+ "eval_loss": 2.298917055130005,
78
+ "eval_runtime": 51.1066,
79
+ "eval_samples_per_second": 100.104,
80
+ "eval_steps_per_second": 3.131,
81
+ "step": 1920
82
+ },
83
+ {
84
+ "epoch": 6.25,
85
+ "learning_rate": 2.633228840125392e-06,
86
+ "loss": 2.1013,
87
+ "step": 2000
88
+ },
89
+ {
90
+ "epoch": 7.0,
91
+ "eval_accuracy": 0.3336591086786552,
92
+ "eval_loss": 2.2518346309661865,
93
+ "eval_runtime": 51.0284,
94
+ "eval_samples_per_second": 100.258,
95
+ "eval_steps_per_second": 3.136,
96
+ "step": 2240
97
+ },
98
+ {
99
+ "epoch": 7.81,
100
+ "learning_rate": 2.5391849529780565e-06,
101
+ "loss": 1.992,
102
+ "step": 2500
103
+ },
104
+ {
105
+ "epoch": 8.0,
106
+ "eval_accuracy": 0.3506645817044566,
107
+ "eval_loss": 2.195549249649048,
108
+ "eval_runtime": 50.666,
109
+ "eval_samples_per_second": 100.975,
110
+ "eval_steps_per_second": 3.158,
111
+ "step": 2560
112
+ },
113
+ {
114
+ "epoch": 9.0,
115
+ "eval_accuracy": 0.3596559812353401,
116
+ "eval_loss": 2.1464827060699463,
117
+ "eval_runtime": 51.2548,
118
+ "eval_samples_per_second": 99.815,
119
+ "eval_steps_per_second": 3.122,
120
+ "step": 2880
121
+ },
122
+ {
123
+ "epoch": 9.38,
124
+ "learning_rate": 2.445141065830721e-06,
125
+ "loss": 1.9057,
126
+ "step": 3000
127
+ },
128
+ {
129
+ "epoch": 10.0,
130
+ "eval_accuracy": 0.3844800625488663,
131
+ "eval_loss": 2.097637176513672,
132
+ "eval_runtime": 51.7915,
133
+ "eval_samples_per_second": 98.781,
134
+ "eval_steps_per_second": 3.089,
135
+ "step": 3200
136
+ },
137
+ {
138
+ "epoch": 10.94,
139
+ "learning_rate": 2.3510971786833857e-06,
140
+ "loss": 1.8272,
141
+ "step": 3500
142
+ },
143
+ {
144
+ "epoch": 11.0,
145
+ "eval_accuracy": 0.3784206411258796,
146
+ "eval_loss": 2.0678927898406982,
147
+ "eval_runtime": 52.5229,
148
+ "eval_samples_per_second": 97.405,
149
+ "eval_steps_per_second": 3.046,
150
+ "step": 3520
151
+ },
152
+ {
153
+ "epoch": 12.0,
154
+ "eval_accuracy": 0.388975762314308,
155
+ "eval_loss": 2.0365805625915527,
156
+ "eval_runtime": 52.5303,
157
+ "eval_samples_per_second": 97.391,
158
+ "eval_steps_per_second": 3.046,
159
+ "step": 3840
160
+ },
161
+ {
162
+ "epoch": 12.5,
163
+ "learning_rate": 2.2570532915360503e-06,
164
+ "loss": 1.7607,
165
+ "step": 4000
166
+ },
167
+ {
168
+ "epoch": 13.0,
169
+ "eval_accuracy": 0.4012900703674746,
170
+ "eval_loss": 1.9932725429534912,
171
+ "eval_runtime": 51.4196,
172
+ "eval_samples_per_second": 99.495,
173
+ "eval_steps_per_second": 3.112,
174
+ "step": 4160
175
+ },
176
+ {
177
+ "epoch": 14.0,
178
+ "eval_accuracy": 0.39190774042220483,
179
+ "eval_loss": 1.9702576398849487,
180
+ "eval_runtime": 51.6324,
181
+ "eval_samples_per_second": 99.085,
182
+ "eval_steps_per_second": 3.099,
183
+ "step": 4480
184
+ },
185
+ {
186
+ "epoch": 14.06,
187
+ "learning_rate": 2.163009404388715e-06,
188
+ "loss": 1.7033,
189
+ "step": 4500
190
+ },
191
+ {
192
+ "epoch": 15.0,
193
+ "eval_accuracy": 0.39874902267396406,
194
+ "eval_loss": 1.9390950202941895,
195
+ "eval_runtime": 51.5713,
196
+ "eval_samples_per_second": 99.203,
197
+ "eval_steps_per_second": 3.103,
198
+ "step": 4800
199
+ },
200
+ {
201
+ "epoch": 15.62,
202
+ "learning_rate": 2.0689655172413796e-06,
203
+ "loss": 1.6414,
204
+ "step": 5000
205
+ },
206
+ {
207
+ "epoch": 16.0,
208
+ "eval_accuracy": 0.40187646598905397,
209
+ "eval_loss": 1.914416790008545,
210
+ "eval_runtime": 51.2253,
211
+ "eval_samples_per_second": 99.872,
212
+ "eval_steps_per_second": 3.123,
213
+ "step": 5120
214
+ },
215
+ {
216
+ "epoch": 17.0,
217
+ "eval_accuracy": 0.405199374511337,
218
+ "eval_loss": 1.8871612548828125,
219
+ "eval_runtime": 51.1314,
220
+ "eval_samples_per_second": 100.056,
221
+ "eval_steps_per_second": 3.129,
222
+ "step": 5440
223
+ },
224
+ {
225
+ "epoch": 17.19,
226
+ "learning_rate": 1.9749216300940438e-06,
227
+ "loss": 1.5939,
228
+ "step": 5500
229
+ },
230
+ {
231
+ "epoch": 18.0,
232
+ "eval_accuracy": 0.40852228303362004,
233
+ "eval_loss": 1.8684996366500854,
234
+ "eval_runtime": 52.2991,
235
+ "eval_samples_per_second": 97.822,
236
+ "eval_steps_per_second": 3.059,
237
+ "step": 5760
238
+ },
239
+ {
240
+ "epoch": 18.75,
241
+ "learning_rate": 1.8808777429467086e-06,
242
+ "loss": 1.5461,
243
+ "step": 6000
244
+ },
245
+ {
246
+ "epoch": 19.0,
247
+ "eval_accuracy": 0.4161454261141517,
248
+ "eval_loss": 1.842322587966919,
249
+ "eval_runtime": 51.5547,
250
+ "eval_samples_per_second": 99.234,
251
+ "eval_steps_per_second": 3.104,
252
+ "step": 6080
253
+ },
254
+ {
255
+ "epoch": 20.0,
256
+ "eval_accuracy": 0.41575449569976547,
257
+ "eval_loss": 1.8240511417388916,
258
+ "eval_runtime": 51.0599,
259
+ "eval_samples_per_second": 100.196,
260
+ "eval_steps_per_second": 3.134,
261
+ "step": 6400
262
+ },
263
+ {
264
+ "epoch": 20.31,
265
+ "learning_rate": 1.786833855799373e-06,
266
+ "loss": 1.5016,
267
+ "step": 6500
268
+ },
269
+ {
270
+ "epoch": 21.0,
271
+ "eval_accuracy": 0.4245504300234558,
272
+ "eval_loss": 1.7987163066864014,
273
+ "eval_runtime": 52.1072,
274
+ "eval_samples_per_second": 98.182,
275
+ "eval_steps_per_second": 3.071,
276
+ "step": 6720
277
+ },
278
+ {
279
+ "epoch": 21.88,
280
+ "learning_rate": 1.6927899686520374e-06,
281
+ "loss": 1.4636,
282
+ "step": 7000
283
+ },
284
+ {
285
+ "epoch": 22.0,
286
+ "eval_accuracy": 0.42865519937451135,
287
+ "eval_loss": 1.777077078819275,
288
+ "eval_runtime": 51.2408,
289
+ "eval_samples_per_second": 99.842,
290
+ "eval_steps_per_second": 3.123,
291
+ "step": 7040
292
+ },
293
+ {
294
+ "epoch": 23.0,
295
+ "eval_accuracy": 0.4292415949960907,
296
+ "eval_loss": 1.7641410827636719,
297
+ "eval_runtime": 51.4723,
298
+ "eval_samples_per_second": 99.393,
299
+ "eval_steps_per_second": 3.108,
300
+ "step": 7360
301
+ },
302
+ {
303
+ "epoch": 23.44,
304
+ "learning_rate": 1.5987460815047023e-06,
305
+ "loss": 1.4204,
306
+ "step": 7500
307
+ },
308
+ {
309
+ "epoch": 24.0,
310
+ "eval_accuracy": 0.4462470680218921,
311
+ "eval_loss": 1.7357829809188843,
312
+ "eval_runtime": 51.0544,
313
+ "eval_samples_per_second": 100.207,
314
+ "eval_steps_per_second": 3.134,
315
+ "step": 7680
316
+ },
317
+ {
318
+ "epoch": 25.0,
319
+ "learning_rate": 1.5047021943573667e-06,
320
+ "loss": 1.3845,
321
+ "step": 8000
322
+ },
323
+ {
324
+ "epoch": 25.0,
325
+ "eval_accuracy": 0.4472243940578577,
326
+ "eval_loss": 1.7216110229492188,
327
+ "eval_runtime": 51.2122,
328
+ "eval_samples_per_second": 99.898,
329
+ "eval_steps_per_second": 3.124,
330
+ "step": 8000
331
+ },
332
+ {
333
+ "epoch": 26.0,
334
+ "eval_accuracy": 0.45054730258014075,
335
+ "eval_loss": 1.7076504230499268,
336
+ "eval_runtime": 51.4157,
337
+ "eval_samples_per_second": 99.503,
338
+ "eval_steps_per_second": 3.112,
339
+ "step": 8320
340
+ },
341
+ {
342
+ "epoch": 26.56,
343
+ "learning_rate": 1.4106583072100315e-06,
344
+ "loss": 1.3521,
345
+ "step": 8500
346
+ },
347
+ {
348
+ "epoch": 27.0,
349
+ "eval_accuracy": 0.45347928068803756,
350
+ "eval_loss": 1.6927095651626587,
351
+ "eval_runtime": 51.5458,
352
+ "eval_samples_per_second": 99.252,
353
+ "eval_steps_per_second": 3.104,
354
+ "step": 8640
355
+ },
356
+ {
357
+ "epoch": 28.0,
358
+ "eval_accuracy": 0.45856137607505865,
359
+ "eval_loss": 1.6739999055862427,
360
+ "eval_runtime": 51.4056,
361
+ "eval_samples_per_second": 99.522,
362
+ "eval_steps_per_second": 3.112,
363
+ "step": 8960
364
+ },
365
+ {
366
+ "epoch": 28.12,
367
+ "learning_rate": 1.316614420062696e-06,
368
+ "loss": 1.322,
369
+ "step": 9000
370
+ },
371
+ {
372
+ "epoch": 29.0,
373
+ "eval_accuracy": 0.45269741985926504,
374
+ "eval_loss": 1.6700100898742676,
375
+ "eval_runtime": 51.1233,
376
+ "eval_samples_per_second": 100.072,
377
+ "eval_steps_per_second": 3.13,
378
+ "step": 9280
379
+ },
380
+ {
381
+ "epoch": 29.69,
382
+ "learning_rate": 1.2225705329153605e-06,
383
+ "loss": 1.2923,
384
+ "step": 9500
385
+ },
386
+ {
387
+ "epoch": 30.0,
388
+ "eval_accuracy": 0.46325254104769353,
389
+ "eval_loss": 1.648740291595459,
390
+ "eval_runtime": 52.5113,
391
+ "eval_samples_per_second": 97.427,
392
+ "eval_steps_per_second": 3.047,
393
+ "step": 9600
394
+ },
395
+ {
396
+ "epoch": 31.0,
397
+ "eval_accuracy": 0.4611024237685692,
398
+ "eval_loss": 1.6401363611221313,
399
+ "eval_runtime": 52.7667,
400
+ "eval_samples_per_second": 96.955,
401
+ "eval_steps_per_second": 3.032,
402
+ "step": 9920
403
+ },
404
+ {
405
+ "epoch": 31.25,
406
+ "learning_rate": 1.1285266457680252e-06,
407
+ "loss": 1.2658,
408
+ "step": 10000
409
+ },
410
+ {
411
+ "epoch": 32.0,
412
+ "eval_accuracy": 0.47146207974980453,
413
+ "eval_loss": 1.6254924535751343,
414
+ "eval_runtime": 52.1102,
415
+ "eval_samples_per_second": 98.177,
416
+ "eval_steps_per_second": 3.07,
417
+ "step": 10240
418
+ },
419
+ {
420
+ "epoch": 32.81,
421
+ "learning_rate": 1.0344827586206898e-06,
422
+ "loss": 1.245,
423
+ "step": 10500
424
+ },
425
+ {
426
+ "epoch": 33.0,
427
+ "eval_accuracy": 0.4683346364347146,
428
+ "eval_loss": 1.6232922077178955,
429
+ "eval_runtime": 50.6089,
430
+ "eval_samples_per_second": 101.089,
431
+ "eval_steps_per_second": 3.161,
432
+ "step": 10560
433
+ },
434
+ {
435
+ "epoch": 34.0,
436
+ "eval_accuracy": 0.47439405785770133,
437
+ "eval_loss": 1.6077964305877686,
438
+ "eval_runtime": 53.7216,
439
+ "eval_samples_per_second": 95.232,
440
+ "eval_steps_per_second": 2.978,
441
+ "step": 10880
442
+ },
443
+ {
444
+ "epoch": 34.38,
445
+ "learning_rate": 9.404388714733543e-07,
446
+ "loss": 1.2201,
447
+ "step": 11000
448
+ },
449
+ {
450
+ "epoch": 35.0,
451
+ "eval_accuracy": 0.47537138389366695,
452
+ "eval_loss": 1.5978459119796753,
453
+ "eval_runtime": 51.1056,
454
+ "eval_samples_per_second": 100.107,
455
+ "eval_steps_per_second": 3.131,
456
+ "step": 11200
457
+ },
458
+ {
459
+ "epoch": 35.94,
460
+ "learning_rate": 8.463949843260187e-07,
461
+ "loss": 1.1993,
462
+ "step": 11500
463
+ },
464
+ {
465
+ "epoch": 36.0,
466
+ "eval_accuracy": 0.47556684910086006,
467
+ "eval_loss": 1.5951019525527954,
468
+ "eval_runtime": 51.3095,
469
+ "eval_samples_per_second": 99.709,
470
+ "eval_steps_per_second": 3.118,
471
+ "step": 11520
472
+ },
473
+ {
474
+ "epoch": 37.0,
475
+ "eval_accuracy": 0.4734167318217357,
476
+ "eval_loss": 1.58991539478302,
477
+ "eval_runtime": 50.7545,
478
+ "eval_samples_per_second": 100.799,
479
+ "eval_steps_per_second": 3.152,
480
+ "step": 11840
481
+ },
482
+ {
483
+ "epoch": 37.5,
484
+ "learning_rate": 7.523510971786833e-07,
485
+ "loss": 1.1829,
486
+ "step": 12000
487
+ },
488
+ {
489
+ "epoch": 38.0,
490
+ "eval_accuracy": 0.4794761532447224,
491
+ "eval_loss": 1.5782525539398193,
492
+ "eval_runtime": 51.9408,
493
+ "eval_samples_per_second": 98.497,
494
+ "eval_steps_per_second": 3.08,
495
+ "step": 12160
496
+ },
497
+ {
498
+ "epoch": 39.0,
499
+ "eval_accuracy": 0.48964034401876466,
500
+ "eval_loss": 1.5660046339035034,
501
+ "eval_runtime": 51.4816,
502
+ "eval_samples_per_second": 99.375,
503
+ "eval_steps_per_second": 3.108,
504
+ "step": 12480
505
+ },
506
+ {
507
+ "epoch": 39.06,
508
+ "learning_rate": 6.58307210031348e-07,
509
+ "loss": 1.1693,
510
+ "step": 12500
511
+ },
512
+ {
513
+ "epoch": 40.0,
514
+ "eval_accuracy": 0.4822126661454261,
515
+ "eval_loss": 1.564785122871399,
516
+ "eval_runtime": 51.6845,
517
+ "eval_samples_per_second": 98.985,
518
+ "eval_steps_per_second": 3.096,
519
+ "step": 12800
520
+ },
521
+ {
522
+ "epoch": 40.62,
523
+ "learning_rate": 5.642633228840126e-07,
524
+ "loss": 1.1526,
525
+ "step": 13000
526
+ },
527
+ {
528
+ "epoch": 41.0,
529
+ "eval_accuracy": 0.48377638780297105,
530
+ "eval_loss": 1.5616761445999146,
531
+ "eval_runtime": 51.6796,
532
+ "eval_samples_per_second": 98.995,
533
+ "eval_steps_per_second": 3.096,
534
+ "step": 13120
535
+ },
536
+ {
537
+ "epoch": 42.0,
538
+ "eval_accuracy": 0.4878811571540266,
539
+ "eval_loss": 1.555199384689331,
540
+ "eval_runtime": 51.0688,
541
+ "eval_samples_per_second": 100.178,
542
+ "eval_steps_per_second": 3.133,
543
+ "step": 13440
544
+ },
545
+ {
546
+ "epoch": 42.19,
547
+ "learning_rate": 4.7021943573667715e-07,
548
+ "loss": 1.1422,
549
+ "step": 13500
550
+ },
551
+ {
552
+ "epoch": 43.0,
553
+ "eval_accuracy": 0.4923768569194683,
554
+ "eval_loss": 1.5509822368621826,
555
+ "eval_runtime": 51.2886,
556
+ "eval_samples_per_second": 99.749,
557
+ "eval_steps_per_second": 3.12,
558
+ "step": 13760
559
+ },
560
+ {
561
+ "epoch": 43.75,
562
+ "learning_rate": 3.7617554858934167e-07,
563
+ "loss": 1.1272,
564
+ "step": 14000
565
+ },
566
+ {
567
+ "epoch": 44.0,
568
+ "eval_accuracy": 0.48631743549648165,
569
+ "eval_loss": 1.5502736568450928,
570
+ "eval_runtime": 50.8248,
571
+ "eval_samples_per_second": 100.659,
572
+ "eval_steps_per_second": 3.148,
573
+ "step": 14080
574
+ },
575
+ {
576
+ "epoch": 45.0,
577
+ "eval_accuracy": 0.4880766223612197,
578
+ "eval_loss": 1.5476186275482178,
579
+ "eval_runtime": 50.5558,
580
+ "eval_samples_per_second": 101.195,
581
+ "eval_steps_per_second": 3.165,
582
+ "step": 14400
583
+ },
584
+ {
585
+ "epoch": 45.31,
586
+ "learning_rate": 2.821316614420063e-07,
587
+ "loss": 1.1256,
588
+ "step": 14500
589
+ },
590
+ {
591
+ "epoch": 46.0,
592
+ "eval_accuracy": 0.48670836591086786,
593
+ "eval_loss": 1.544900894165039,
594
+ "eval_runtime": 50.2484,
595
+ "eval_samples_per_second": 101.814,
596
+ "eval_steps_per_second": 3.184,
597
+ "step": 14720
598
+ },
599
+ {
600
+ "epoch": 46.88,
601
+ "learning_rate": 1.8808777429467083e-07,
602
+ "loss": 1.1104,
603
+ "step": 15000
604
+ },
605
+ {
606
+ "epoch": 47.0,
607
+ "eval_accuracy": 0.4904222048475371,
608
+ "eval_loss": 1.5414071083068848,
609
+ "eval_runtime": 50.6029,
610
+ "eval_samples_per_second": 101.101,
611
+ "eval_steps_per_second": 3.162,
612
+ "step": 15040
613
+ },
614
+ {
615
+ "epoch": 48.0,
616
+ "eval_accuracy": 0.4880766223612197,
617
+ "eval_loss": 1.542637586593628,
618
+ "eval_runtime": 50.2557,
619
+ "eval_samples_per_second": 101.799,
620
+ "eval_steps_per_second": 3.184,
621
+ "step": 15360
622
+ },
623
+ {
624
+ "epoch": 48.44,
625
+ "learning_rate": 9.404388714733542e-08,
626
+ "loss": 1.1101,
627
+ "step": 15500
628
+ },
629
+ {
630
+ "epoch": 49.0,
631
+ "eval_accuracy": 0.49276778733385457,
632
+ "eval_loss": 1.5390384197235107,
633
+ "eval_runtime": 50.5326,
634
+ "eval_samples_per_second": 101.242,
635
+ "eval_steps_per_second": 3.166,
636
+ "step": 15680
637
+ }
638
+ ],
639
+ "logging_steps": 500,
640
+ "max_steps": 16000,
641
+ "num_train_epochs": 50,
642
+ "save_steps": 500,
643
+ "total_flos": 7.770344157480554e+19,
644
+ "trial_name": null,
645
+ "trial_params": null
646
+ }
checkpoint-15680/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5cd01dd5ed26cea902b390d702900b9a59a1879a09644e120ad3fc63e298e03
3
+ size 4091
config.json CHANGED
@@ -16,23 +16,22 @@
16
  "4": "05",
17
  "5": "06-07",
18
  "6": "08-09",
19
- "7": "08-9",
20
- "8": "10-12",
21
- "9": "13-15",
22
- "10": "16-20",
23
- "11": "21-25",
24
- "12": "26-30",
25
- "13": "31-35",
26
- "14": "36-40",
27
- "15": "41-45",
28
- "16": "46-50",
29
- "17": "51-55",
30
- "18": "56-60",
31
- "19": "61-65",
32
- "20": "66-70",
33
- "21": "71-80",
34
- "22": "81-90",
35
- "23": "90+"
36
  },
37
  "image_size": 224,
38
  "initializer_range": 0.02,
@@ -45,23 +44,22 @@
45
  "05": 4,
46
  "06-07": 5,
47
  "08-09": 6,
48
- "08-9": 7,
49
- "10-12": 8,
50
- "13-15": 9,
51
- "16-20": 10,
52
- "21-25": 11,
53
- "26-30": 12,
54
- "31-35": 13,
55
- "36-40": 14,
56
- "41-45": 15,
57
- "46-50": 16,
58
- "51-55": 17,
59
- "56-60": 18,
60
- "61-65": 19,
61
- "66-70": 20,
62
- "71-80": 21,
63
- "81-90": 22,
64
- "90+": 23
65
  },
66
  "layer_norm_eps": 1e-12,
67
  "model_type": "vit",
 
16
  "4": "05",
17
  "5": "06-07",
18
  "6": "08-09",
19
+ "7": "10-12",
20
+ "8": "13-15",
21
+ "9": "16-20",
22
+ "10": "21-25",
23
+ "11": "26-30",
24
+ "12": "31-35",
25
+ "13": "36-40",
26
+ "14": "41-45",
27
+ "15": "46-50",
28
+ "16": "51-55",
29
+ "17": "56-60",
30
+ "18": "61-65",
31
+ "19": "66-70",
32
+ "20": "71-80",
33
+ "21": "81-90",
34
+ "22": "90+"
 
35
  },
36
  "image_size": 224,
37
  "initializer_range": 0.02,
 
44
  "05": 4,
45
  "06-07": 5,
46
  "08-09": 6,
47
+ "10-12": 7,
48
+ "13-15": 8,
49
+ "16-20": 9,
50
+ "21-25": 10,
51
+ "26-30": 11,
52
+ "31-35": 12,
53
+ "36-40": 13,
54
+ "41-45": 14,
55
+ "46-50": 15,
56
+ "51-55": 16,
57
+ "56-60": 17,
58
+ "61-65": 18,
59
+ "66-70": 19,
60
+ "71-80": 20,
61
+ "81-90": 21,
62
+ "90+": 22
 
63
  },
64
  "layer_norm_eps": 1e-12,
65
  "model_type": "vit",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a22def0831c81660cca9db86c3bae57d49fe05506dfb7beb2270c5fa8af17134
3
- size 343291648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a63ea460c98a52e8bcfec2f3c809237b00fc0d2b500c3c22931b40d44716a89
3
+ size 343288572
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7c9a1b990012db059eeb2765cb4f742d0ae6ffaa2dbd995adacf469ba98c917
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5cd01dd5ed26cea902b390d702900b9a59a1879a09644e120ad3fc63e298e03
3
  size 4091