NDSrex commited on
Commit
fd6bc47
1 Parent(s): 178a211

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +82 -289
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2d49a2ad2df39c3c1b8ba9be349127ce620615e5d7fd3e5d5121180e011ab5
3
  size 686549637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656817bfa6a22aa8943bf0da0421dc07683b885df8cc35d91a24f711f492f8e4
3
  size 686549637
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f754c81e253146cf8f143265ea8a0855432a701853aa6eabfbfe4cb4ec02dc0b
3
  size 343284077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8d1e9b12a095f980d3cff70df27da6cf9b455e9897a38989e44d3a3273706f
3
  size 343284077
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bb3860d98363e44bc4704fb9458deec927fd96997d1d09c80639070db959d22
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7508d4b8dd267de5cc58e972da25236687927651336a28f292c92f7f23951475
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e5c5410236a01c76e00a372a6d100405306d921ea2e7b7cc3083628f1364373
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1104ddaf4c1d9ec238905ad79c16f00386750536dfff0f555acbb7df8c8a8135
3
  size 627
trainer_state.json CHANGED
@@ -1,430 +1,223 @@
1
  {
2
- "best_metric": 0.07279229909181595,
3
- "best_model_checkpoint": "./vit-base-beans/checkpoint-500",
4
- "epoch": 3.6144578313253013,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
- "learning_rate": 0.00019698795180722893,
13
- "loss": 1.6355,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.12,
18
- "learning_rate": 0.00019397590361445782,
19
- "loss": 1.0697,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.18,
24
- "learning_rate": 0.00019096385542168677,
25
- "loss": 0.6398,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.24,
30
- "learning_rate": 0.00018795180722891569,
31
- "loss": 0.4757,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.3,
36
- "learning_rate": 0.00018493975903614458,
37
- "loss": 0.3393,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.36,
42
- "learning_rate": 0.0001819277108433735,
43
- "loss": 0.297,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.42,
48
- "learning_rate": 0.00017891566265060242,
49
- "loss": 0.2623,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.48,
54
- "learning_rate": 0.00017590361445783134,
55
- "loss": 0.2053,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.54,
60
- "learning_rate": 0.00017289156626506026,
61
- "loss": 0.1578,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.6,
66
- "learning_rate": 0.00016987951807228917,
67
- "loss": 0.1579,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.6,
72
- "eval_accuracy": 0.967479674796748,
73
- "eval_loss": 0.16562165319919586,
74
- "eval_runtime": 3.9225,
75
- "eval_samples_per_second": 62.716,
76
- "eval_steps_per_second": 7.903,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.66,
81
- "learning_rate": 0.0001668674698795181,
82
- "loss": 0.1362,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.72,
87
- "learning_rate": 0.00016385542168674699,
88
- "loss": 0.1524,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.78,
93
- "learning_rate": 0.0001608433734939759,
94
- "loss": 0.0724,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.84,
99
- "learning_rate": 0.00015783132530120482,
100
- "loss": 0.2021,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 0.9,
105
- "learning_rate": 0.00015481927710843374,
106
- "loss": 0.0813,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 0.96,
111
- "learning_rate": 0.00015180722891566266,
112
- "loss": 0.0626,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.02,
117
- "learning_rate": 0.00014879518072289158,
118
- "loss": 0.0959,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.08,
123
- "learning_rate": 0.00014578313253012047,
124
- "loss": 0.0884,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.14,
129
- "learning_rate": 0.0001427710843373494,
130
- "loss": 0.0713,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.2,
135
- "learning_rate": 0.00013975903614457834,
136
- "loss": 0.0604,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.2,
141
- "eval_accuracy": 0.983739837398374,
142
- "eval_loss": 0.11851482838392258,
143
- "eval_runtime": 4.1596,
144
- "eval_samples_per_second": 59.14,
145
- "eval_steps_per_second": 7.453,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 1.27,
150
- "learning_rate": 0.00013674698795180723,
151
- "loss": 0.0534,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 1.33,
156
- "learning_rate": 0.00013373493975903615,
157
- "loss": 0.0292,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 1.39,
162
- "learning_rate": 0.00013072289156626507,
163
- "loss": 0.0395,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 1.45,
168
- "learning_rate": 0.00012771084337349396,
169
- "loss": 0.0325,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 1.51,
174
- "learning_rate": 0.00012469879518072288,
175
- "loss": 0.0298,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 1.57,
180
- "learning_rate": 0.00012168674698795181,
181
- "loss": 0.0211,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 1.63,
186
- "learning_rate": 0.00011867469879518073,
187
- "loss": 0.07,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 1.69,
192
- "learning_rate": 0.00011566265060240964,
193
- "loss": 0.0411,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 1.75,
198
- "learning_rate": 0.00011265060240963856,
199
- "loss": 0.0201,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 1.81,
204
- "learning_rate": 0.00010963855421686749,
205
- "loss": 0.0178,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 1.81,
210
- "eval_accuracy": 0.9634146341463414,
211
- "eval_loss": 0.1352052390575409,
212
- "eval_runtime": 4.1303,
213
- "eval_samples_per_second": 59.559,
214
- "eval_steps_per_second": 7.505,
215
  "step": 300
216
- },
217
- {
218
- "epoch": 1.87,
219
- "learning_rate": 0.00010662650602409638,
220
- "loss": 0.0229,
221
- "step": 310
222
- },
223
- {
224
- "epoch": 1.93,
225
- "learning_rate": 0.0001036144578313253,
226
- "loss": 0.0155,
227
- "step": 320
228
- },
229
- {
230
- "epoch": 1.99,
231
- "learning_rate": 0.00010060240963855423,
232
- "loss": 0.0148,
233
- "step": 330
234
- },
235
- {
236
- "epoch": 2.05,
237
- "learning_rate": 9.759036144578314e-05,
238
- "loss": 0.0156,
239
- "step": 340
240
- },
241
- {
242
- "epoch": 2.11,
243
- "learning_rate": 9.457831325301205e-05,
244
- "loss": 0.0135,
245
- "step": 350
246
- },
247
- {
248
- "epoch": 2.17,
249
- "learning_rate": 9.156626506024096e-05,
250
- "loss": 0.0139,
251
- "step": 360
252
- },
253
- {
254
- "epoch": 2.23,
255
- "learning_rate": 8.855421686746988e-05,
256
- "loss": 0.0229,
257
- "step": 370
258
- },
259
- {
260
- "epoch": 2.29,
261
- "learning_rate": 8.55421686746988e-05,
262
- "loss": 0.0122,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 2.35,
267
- "learning_rate": 8.253012048192772e-05,
268
- "loss": 0.0121,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 2.41,
273
- "learning_rate": 7.951807228915663e-05,
274
- "loss": 0.0112,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 2.41,
279
- "eval_accuracy": 0.975609756097561,
280
- "eval_loss": 0.10706935822963715,
281
- "eval_runtime": 4.0159,
282
- "eval_samples_per_second": 61.257,
283
- "eval_steps_per_second": 7.719,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 2.47,
288
- "learning_rate": 7.650602409638555e-05,
289
- "loss": 0.0269,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 2.53,
294
- "learning_rate": 7.349397590361447e-05,
295
- "loss": 0.0119,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 2.59,
300
- "learning_rate": 7.048192771084337e-05,
301
- "loss": 0.0346,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 2.65,
306
- "learning_rate": 6.746987951807229e-05,
307
- "loss": 0.0104,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 2.71,
312
- "learning_rate": 6.445783132530121e-05,
313
- "loss": 0.0101,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 2.77,
318
- "learning_rate": 6.144578313253012e-05,
319
- "loss": 0.0114,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 2.83,
324
- "learning_rate": 5.843373493975904e-05,
325
- "loss": 0.0098,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 2.89,
330
- "learning_rate": 5.5421686746987955e-05,
331
- "loss": 0.0094,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 2.95,
336
- "learning_rate": 5.240963855421687e-05,
337
- "loss": 0.0099,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 3.01,
342
- "learning_rate": 4.9397590361445786e-05,
343
- "loss": 0.0092,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 3.01,
348
- "eval_accuracy": 0.983739837398374,
349
- "eval_loss": 0.07279229909181595,
350
- "eval_runtime": 4.4168,
351
- "eval_samples_per_second": 55.696,
352
- "eval_steps_per_second": 7.019,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 3.07,
357
- "learning_rate": 4.63855421686747e-05,
358
- "loss": 0.009,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 3.13,
363
- "learning_rate": 4.337349397590362e-05,
364
- "loss": 0.0089,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 3.19,
369
- "learning_rate": 4.036144578313254e-05,
370
- "loss": 0.0087,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 3.25,
375
- "learning_rate": 3.734939759036144e-05,
376
- "loss": 0.0085,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 3.31,
381
- "learning_rate": 3.433734939759036e-05,
382
- "loss": 0.0083,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 3.37,
387
- "learning_rate": 3.132530120481928e-05,
388
- "loss": 0.0082,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 3.43,
393
- "learning_rate": 2.8313253012048197e-05,
394
- "loss": 0.0083,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 3.49,
399
- "learning_rate": 2.530120481927711e-05,
400
- "loss": 0.0317,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 3.55,
405
- "learning_rate": 2.2289156626506025e-05,
406
- "loss": 0.0079,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 3.61,
411
- "learning_rate": 1.927710843373494e-05,
412
- "loss": 0.008,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 3.61,
417
- "eval_accuracy": 0.983739837398374,
418
- "eval_loss": 0.08243442326784134,
419
- "eval_runtime": 3.9885,
420
- "eval_samples_per_second": 61.677,
421
- "eval_steps_per_second": 7.772,
422
- "step": 600
423
  }
424
  ],
425
- "max_steps": 664,
426
- "num_train_epochs": 4,
427
- "total_flos": 7.430264924897526e+17,
428
  "trial_name": null,
429
  "trial_params": null
430
  }
 
1
  {
2
+ "best_metric": 0.12367749214172363,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-300",
4
+ "epoch": 1.8072289156626506,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
+ "learning_rate": 0.00019951807228915663,
13
+ "loss": 1.6515,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.12,
18
+ "learning_rate": 0.00019903614457831325,
19
+ "loss": 1.0551,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.18,
24
+ "learning_rate": 0.0001985542168674699,
25
+ "loss": 0.6568,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.24,
30
+ "learning_rate": 0.00019807228915662652,
31
+ "loss": 0.4811,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.3,
36
+ "learning_rate": 0.00019759036144578314,
37
+ "loss": 0.3576,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.36,
42
+ "learning_rate": 0.00019710843373493977,
43
+ "loss": 0.2787,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.42,
48
+ "learning_rate": 0.00019662650602409642,
49
+ "loss": 0.233,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.48,
54
+ "learning_rate": 0.000196144578313253,
55
+ "loss": 0.1287,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.54,
60
+ "learning_rate": 0.00019566265060240966,
61
+ "loss": 0.1755,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.6,
66
+ "learning_rate": 0.00019518072289156628,
67
+ "loss": 0.109,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.6,
72
+ "eval_accuracy": 0.959349593495935,
73
+ "eval_loss": 0.16569873690605164,
74
+ "eval_runtime": 4.541,
75
+ "eval_samples_per_second": 54.173,
76
+ "eval_steps_per_second": 6.827,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.66,
81
+ "learning_rate": 0.0001946987951807229,
82
+ "loss": 0.1129,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.72,
87
+ "learning_rate": 0.00019421686746987952,
88
+ "loss": 0.1769,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.78,
93
+ "learning_rate": 0.00019373493975903617,
94
+ "loss": 0.1511,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.84,
99
+ "learning_rate": 0.00019325301204819277,
100
+ "loss": 0.2121,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 0.9,
105
+ "learning_rate": 0.00019277108433734942,
106
+ "loss": 0.135,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 0.96,
111
+ "learning_rate": 0.00019228915662650604,
112
+ "loss": 0.1046,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.02,
117
+ "learning_rate": 0.00019180722891566266,
118
+ "loss": 0.0742,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.08,
123
+ "learning_rate": 0.00019132530120481928,
124
+ "loss": 0.0655,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.14,
129
+ "learning_rate": 0.00019084337349397593,
130
+ "loss": 0.0735,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.2,
135
+ "learning_rate": 0.00019036144578313252,
136
+ "loss": 0.0599,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.2,
141
+ "eval_accuracy": 0.9471544715447154,
142
+ "eval_loss": 0.16865180432796478,
143
+ "eval_runtime": 4.123,
144
+ "eval_samples_per_second": 59.665,
145
+ "eval_steps_per_second": 7.519,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 1.27,
150
+ "learning_rate": 0.00018987951807228917,
151
+ "loss": 0.1547,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 1.33,
156
+ "learning_rate": 0.0001893975903614458,
157
+ "loss": 0.0723,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 1.39,
162
+ "learning_rate": 0.00018891566265060242,
163
+ "loss": 0.045,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 1.45,
168
+ "learning_rate": 0.00018843373493975904,
169
+ "loss": 0.0339,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 1.51,
174
+ "learning_rate": 0.00018795180722891569,
175
+ "loss": 0.0424,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 1.57,
180
+ "learning_rate": 0.00018746987951807228,
181
+ "loss": 0.1061,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 1.63,
186
+ "learning_rate": 0.00018698795180722893,
187
+ "loss": 0.0557,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 1.69,
192
+ "learning_rate": 0.00018650602409638555,
193
+ "loss": 0.0491,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 1.75,
198
+ "learning_rate": 0.00018602409638554217,
199
+ "loss": 0.0489,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 1.81,
204
+ "learning_rate": 0.0001855421686746988,
205
+ "loss": 0.0387,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 1.81,
210
+ "eval_accuracy": 0.9715447154471545,
211
+ "eval_loss": 0.12367749214172363,
212
+ "eval_runtime": 4.2743,
213
+ "eval_samples_per_second": 57.553,
214
+ "eval_steps_per_second": 7.253,
215
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  }
217
  ],
218
+ "max_steps": 4150,
219
+ "num_train_epochs": 25,
220
+ "total_flos": 3.716682371694674e+17,
221
  "trial_name": null,
222
  "trial_params": null
223
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8366d37c75d8f34e90410171cc6618ba8f6df896670df7c5bdb12c3704ca209
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33304c35f757f7a8769397f0990b45525c35082ccef0e18edbe695db423c6d46
3
  size 3899