Augusto777 commited on
Commit
f38dfb8
·
verified ·
1 Parent(s): 55597f6

End of training

Browse files
README.md CHANGED
@@ -17,12 +17,12 @@ model-index:
17
  name: imagefolder
18
  type: imagefolder
19
  config: default
20
- split: validation
21
  args: default
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8387096774193549
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224](https://huggingface.co/microsoft/beit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.5389
36
- - Accuracy: 0.8387
37
 
38
  ## Model description
39
 
@@ -52,7 +52,7 @@ More information needed
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
- - learning_rate: 3.8e-05
56
  - train_batch_size: 16
57
  - eval_batch_size: 16
58
  - seed: 42
@@ -67,43 +67,43 @@ The following hyperparameters were used during training:
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
- | No log | 0.91 | 5 | 1.7603 | 0.1452 |
71
- | 1.7693 | 2.0 | 11 | 1.6916 | 0.1452 |
72
- | 1.7693 | 2.91 | 16 | 1.5752 | 0.1452 |
73
- | 1.6261 | 4.0 | 22 | 1.4015 | 0.1452 |
74
- | 1.6261 | 4.91 | 27 | 1.2890 | 0.1452 |
75
- | 1.3534 | 6.0 | 33 | 1.2128 | 0.3710 |
76
- | 1.3534 | 6.91 | 38 | 1.1418 | 0.4032 |
77
- | 1.1661 | 8.0 | 44 | 1.0727 | 0.4677 |
78
- | 1.1661 | 8.91 | 49 | 1.0909 | 0.4032 |
79
- | 1.0344 | 10.0 | 55 | 0.9719 | 0.6129 |
80
- | 0.9604 | 10.91 | 60 | 0.9923 | 0.6452 |
81
- | 0.9604 | 12.0 | 66 | 0.9554 | 0.6290 |
82
- | 0.8477 | 12.91 | 71 | 0.9156 | 0.6774 |
83
- | 0.8477 | 14.0 | 77 | 0.8339 | 0.7097 |
84
- | 0.7727 | 14.91 | 82 | 0.7851 | 0.7258 |
85
- | 0.7727 | 16.0 | 88 | 0.7994 | 0.7258 |
86
- | 0.6714 | 16.91 | 93 | 0.8246 | 0.6290 |
87
- | 0.6714 | 18.0 | 99 | 0.7389 | 0.7097 |
88
- | 0.6143 | 18.91 | 104 | 0.8202 | 0.6452 |
89
- | 0.5398 | 20.0 | 110 | 0.6295 | 0.7742 |
90
- | 0.5398 | 20.91 | 115 | 0.6736 | 0.7581 |
91
- | 0.4958 | 22.0 | 121 | 0.6218 | 0.7903 |
92
- | 0.4958 | 22.91 | 126 | 0.6401 | 0.7742 |
93
- | 0.4561 | 24.0 | 132 | 0.6640 | 0.7258 |
94
- | 0.4561 | 24.91 | 137 | 0.6009 | 0.7742 |
95
- | 0.4149 | 26.0 | 143 | 0.5619 | 0.8065 |
96
- | 0.4149 | 26.91 | 148 | 0.5118 | 0.8065 |
97
- | 0.3965 | 28.0 | 154 | 0.5682 | 0.8065 |
98
- | 0.3965 | 28.91 | 159 | 0.5515 | 0.8065 |
99
- | 0.4219 | 30.0 | 165 | 0.7045 | 0.7097 |
100
- | 0.3939 | 30.91 | 170 | 0.5712 | 0.8065 |
101
- | 0.3939 | 32.0 | 176 | 0.5857 | 0.8065 |
102
- | 0.3598 | 32.91 | 181 | 0.5539 | 0.8065 |
103
- | 0.3598 | 34.0 | 187 | 0.5471 | 0.8226 |
104
- | 0.3613 | 34.91 | 192 | 0.5408 | 0.8226 |
105
- | 0.3613 | 36.0 | 198 | 0.5389 | 0.8387 |
106
- | 0.3748 | 36.36 | 200 | 0.5390 | 0.8387 |
107
 
108
 
109
  ### Framework versions
 
17
  name: imagefolder
18
  type: imagefolder
19
  config: default
20
+ split: train
21
  args: default
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.9032258064516129
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224](https://huggingface.co/microsoft/beit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.4274
36
+ - Accuracy: 0.9032
37
 
38
  ## Model description
39
 
 
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
+ - learning_rate: 5e-05
56
  - train_batch_size: 16
57
  - eval_batch_size: 16
58
  - seed: 42
 
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | No log | 0.91 | 5 | 1.7093 | 0.1774 |
71
+ | 1.7744 | 2.0 | 11 | 1.6178 | 0.1774 |
72
+ | 1.7744 | 2.91 | 16 | 1.4730 | 0.1774 |
73
+ | 1.5823 | 4.0 | 22 | 1.2754 | 0.1774 |
74
+ | 1.5823 | 4.91 | 27 | 1.1455 | 0.5645 |
75
+ | 1.27 | 6.0 | 33 | 1.0147 | 0.6290 |
76
+ | 1.27 | 6.91 | 38 | 0.9790 | 0.5484 |
77
+ | 1.079 | 8.0 | 44 | 1.0474 | 0.4516 |
78
+ | 1.079 | 8.91 | 49 | 0.8796 | 0.7581 |
79
+ | 1.005 | 10.0 | 55 | 0.7759 | 0.7742 |
80
+ | 0.8479 | 10.91 | 60 | 0.7421 | 0.8226 |
81
+ | 0.8479 | 12.0 | 66 | 0.6760 | 0.8548 |
82
+ | 0.7695 | 12.91 | 71 | 0.5933 | 0.8387 |
83
+ | 0.7695 | 14.0 | 77 | 0.6372 | 0.7742 |
84
+ | 0.6591 | 14.91 | 82 | 0.5653 | 0.8387 |
85
+ | 0.6591 | 16.0 | 88 | 0.4950 | 0.8710 |
86
+ | 0.5675 | 16.91 | 93 | 0.5040 | 0.8226 |
87
+ | 0.5675 | 18.0 | 99 | 0.4274 | 0.9032 |
88
+ | 0.5134 | 18.91 | 104 | 0.4617 | 0.8548 |
89
+ | 0.4418 | 20.0 | 110 | 0.4245 | 0.8871 |
90
+ | 0.4418 | 20.91 | 115 | 0.4922 | 0.8387 |
91
+ | 0.402 | 22.0 | 121 | 0.5112 | 0.8226 |
92
+ | 0.402 | 22.91 | 126 | 0.4696 | 0.8548 |
93
+ | 0.4039 | 24.0 | 132 | 0.4014 | 0.8710 |
94
+ | 0.4039 | 24.91 | 137 | 0.5006 | 0.8226 |
95
+ | 0.4216 | 26.0 | 143 | 0.5351 | 0.8548 |
96
+ | 0.4216 | 26.91 | 148 | 0.5203 | 0.8548 |
97
+ | 0.3593 | 28.0 | 154 | 0.4082 | 0.8548 |
98
+ | 0.3593 | 28.91 | 159 | 0.4017 | 0.8710 |
99
+ | 0.3638 | 30.0 | 165 | 0.4068 | 0.8871 |
100
+ | 0.3509 | 30.91 | 170 | 0.3991 | 0.8871 |
101
+ | 0.3509 | 32.0 | 176 | 0.3965 | 0.8710 |
102
+ | 0.3426 | 32.91 | 181 | 0.3921 | 0.8710 |
103
+ | 0.3426 | 34.0 | 187 | 0.3998 | 0.8710 |
104
+ | 0.3253 | 34.91 | 192 | 0.4102 | 0.8871 |
105
+ | 0.3253 | 36.0 | 198 | 0.4081 | 0.8871 |
106
+ | 0.3085 | 36.36 | 200 | 0.4083 | 0.8871 |
107
 
108
 
109
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 36.36,
3
- "eval_accuracy": 0.8387096774193549,
4
- "eval_loss": 0.5388934016227722,
5
- "eval_runtime": 2.3395,
6
- "eval_samples_per_second": 26.501,
7
- "eval_steps_per_second": 1.71,
8
- "train_loss": 0.7515219366550445,
9
- "train_runtime": 697.4947,
10
- "train_samples_per_second": 20.072,
11
- "train_steps_per_second": 0.287
12
  }
 
1
  {
2
  "epoch": 36.36,
3
+ "eval_accuracy": 0.9032258064516129,
4
+ "eval_loss": 0.4273931682109833,
5
+ "eval_runtime": 2.4718,
6
+ "eval_samples_per_second": 25.083,
7
+ "eval_steps_per_second": 1.618,
8
+ "train_loss": 0.6893912732601166,
9
+ "train_runtime": 709.0628,
10
+ "train_samples_per_second": 19.744,
11
+ "train_steps_per_second": 0.282
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 36.36,
3
- "eval_accuracy": 0.8387096774193549,
4
- "eval_loss": 0.5388934016227722,
5
- "eval_runtime": 2.3395,
6
- "eval_samples_per_second": 26.501,
7
- "eval_steps_per_second": 1.71
8
  }
 
1
  {
2
  "epoch": 36.36,
3
+ "eval_accuracy": 0.9032258064516129,
4
+ "eval_loss": 0.4273931682109833,
5
+ "eval_runtime": 2.4718,
6
+ "eval_samples_per_second": 25.083,
7
+ "eval_steps_per_second": 1.618
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59924b060910eddb739a9254c885be73dcdec77373c5fa406b5d448ab0ce9573
3
  size 343086480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d96908ad48e4d7c197f6f3197c8f8e7fa17c3e3387440d03677a0382a863f75
3
  size 343086480
runs/Oct11_17-14-38_DESKTOP-SKBE9FB/events.out.tfevents.1728688479.DESKTOP-SKBE9FB.10424.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5f1edd262d1cad2045fec2fc53ab4f9e4a38b18f4e3b5ef65738e1076a6887e
3
+ size 20604
runs/Oct11_17-14-38_DESKTOP-SKBE9FB/events.out.tfevents.1728689179.DESKTOP-SKBE9FB.10424.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e9e033122585d2c4755c85276ecb8c1151e21ffb8335db8ab14af9ff167f43
3
+ size 411
runs/Oct11_17-28-45_DESKTOP-SKBE9FB/events.out.tfevents.1728689327.DESKTOP-SKBE9FB.15824.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7572a843c89ad80a11666ae3032c7bbfdc0b847c9da2496fcae4ab06890dcb7d
3
+ size 6909
runs/Oct11_17-30-54_DESKTOP-SKBE9FB/events.out.tfevents.1728689455.DESKTOP-SKBE9FB.9024.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bb5741a9bcd3606a3c061a02230595b52f382c9383a79bb712bce3eb54451d5
3
+ size 20604
runs/Oct11_17-30-54_DESKTOP-SKBE9FB/events.out.tfevents.1728690167.DESKTOP-SKBE9FB.9024.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0f93adcf4af56501d2cb9befeb4c41b5bc602e95448ab9fcab16efd6528b83
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 36.36,
3
- "train_loss": 0.7515219366550445,
4
- "train_runtime": 697.4947,
5
- "train_samples_per_second": 20.072,
6
- "train_steps_per_second": 0.287
7
  }
 
1
  {
2
  "epoch": 36.36,
3
+ "train_loss": 0.6893912732601166,
4
+ "train_runtime": 709.0628,
5
+ "train_samples_per_second": 19.744,
6
+ "train_steps_per_second": 0.282
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8387096774193549,
3
- "best_model_checkpoint": "beit-base-patch16-224-OT-alt\\checkpoint-198",
4
  "epoch": 36.36363636363637,
5
  "eval_steps": 500,
6
  "global_step": 200,
@@ -10,465 +10,465 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.91,
13
- "eval_accuracy": 0.14516129032258066,
14
- "eval_loss": 1.7602910995483398,
15
- "eval_runtime": 2.2295,
16
- "eval_samples_per_second": 27.809,
17
- "eval_steps_per_second": 1.794,
18
  "step": 5
19
  },
20
  {
21
  "epoch": 1.82,
22
- "learning_rate": 1.9e-05,
23
- "loss": 1.7693,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.14516129032258066,
29
- "eval_loss": 1.6916192770004272,
30
- "eval_runtime": 2.248,
31
- "eval_samples_per_second": 27.579,
32
- "eval_steps_per_second": 1.779,
33
  "step": 11
34
  },
35
  {
36
  "epoch": 2.91,
37
- "eval_accuracy": 0.14516129032258066,
38
- "eval_loss": 1.5752112865447998,
39
- "eval_runtime": 2.2645,
40
- "eval_samples_per_second": 27.379,
41
- "eval_steps_per_second": 1.766,
42
  "step": 16
43
  },
44
  {
45
  "epoch": 3.64,
46
- "learning_rate": 3.8e-05,
47
- "loss": 1.6261,
48
  "step": 20
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.14516129032258066,
53
- "eval_loss": 1.4015283584594727,
54
- "eval_runtime": 2.295,
55
- "eval_samples_per_second": 27.015,
56
- "eval_steps_per_second": 1.743,
57
  "step": 22
58
  },
59
  {
60
  "epoch": 4.91,
61
- "eval_accuracy": 0.14516129032258066,
62
- "eval_loss": 1.2889776229858398,
63
- "eval_runtime": 2.279,
64
- "eval_samples_per_second": 27.205,
65
- "eval_steps_per_second": 1.755,
66
  "step": 27
67
  },
68
  {
69
  "epoch": 5.45,
70
- "learning_rate": 3.588888888888889e-05,
71
- "loss": 1.3534,
72
  "step": 30
73
  },
74
  {
75
  "epoch": 6.0,
76
- "eval_accuracy": 0.3709677419354839,
77
- "eval_loss": 1.2128400802612305,
78
- "eval_runtime": 2.3157,
79
- "eval_samples_per_second": 26.774,
80
- "eval_steps_per_second": 1.727,
81
  "step": 33
82
  },
83
  {
84
  "epoch": 6.91,
85
- "eval_accuracy": 0.4032258064516129,
86
- "eval_loss": 1.1418391466140747,
87
- "eval_runtime": 2.2485,
88
- "eval_samples_per_second": 27.573,
89
- "eval_steps_per_second": 1.779,
90
  "step": 38
91
  },
92
  {
93
  "epoch": 7.27,
94
- "learning_rate": 3.3777777777777776e-05,
95
- "loss": 1.1661,
96
  "step": 40
97
  },
98
  {
99
  "epoch": 8.0,
100
- "eval_accuracy": 0.46774193548387094,
101
- "eval_loss": 1.0726869106292725,
102
- "eval_runtime": 2.247,
103
- "eval_samples_per_second": 27.592,
104
- "eval_steps_per_second": 1.78,
105
  "step": 44
106
  },
107
  {
108
  "epoch": 8.91,
109
- "eval_accuracy": 0.4032258064516129,
110
- "eval_loss": 1.0908629894256592,
111
- "eval_runtime": 2.288,
112
- "eval_samples_per_second": 27.097,
113
- "eval_steps_per_second": 1.748,
114
  "step": 49
115
  },
116
  {
117
  "epoch": 9.09,
118
- "learning_rate": 3.166666666666667e-05,
119
- "loss": 1.0344,
120
  "step": 50
121
  },
122
  {
123
  "epoch": 10.0,
124
- "eval_accuracy": 0.6129032258064516,
125
- "eval_loss": 0.9719263911247253,
126
- "eval_runtime": 2.2685,
127
- "eval_samples_per_second": 27.33,
128
- "eval_steps_per_second": 1.763,
129
  "step": 55
130
  },
131
  {
132
  "epoch": 10.91,
133
- "learning_rate": 2.9555555555555556e-05,
134
- "loss": 0.9604,
135
  "step": 60
136
  },
137
  {
138
  "epoch": 10.91,
139
- "eval_accuracy": 0.6451612903225806,
140
- "eval_loss": 0.9922870397567749,
141
- "eval_runtime": 2.249,
142
- "eval_samples_per_second": 27.567,
143
- "eval_steps_per_second": 1.779,
144
  "step": 60
145
  },
146
  {
147
  "epoch": 12.0,
148
- "eval_accuracy": 0.6290322580645161,
149
- "eval_loss": 0.9554464221000671,
150
- "eval_runtime": 2.3255,
151
- "eval_samples_per_second": 26.661,
152
- "eval_steps_per_second": 1.72,
153
  "step": 66
154
  },
155
  {
156
  "epoch": 12.73,
157
- "learning_rate": 2.7444444444444447e-05,
158
- "loss": 0.8477,
159
  "step": 70
160
  },
161
  {
162
  "epoch": 12.91,
163
- "eval_accuracy": 0.6774193548387096,
164
- "eval_loss": 0.9155737161636353,
165
- "eval_runtime": 2.259,
166
- "eval_samples_per_second": 27.445,
167
- "eval_steps_per_second": 1.771,
168
  "step": 71
169
  },
170
  {
171
  "epoch": 14.0,
172
- "eval_accuracy": 0.7096774193548387,
173
- "eval_loss": 0.833889901638031,
174
- "eval_runtime": 2.3145,
175
- "eval_samples_per_second": 26.788,
176
- "eval_steps_per_second": 1.728,
177
  "step": 77
178
  },
179
  {
180
  "epoch": 14.55,
181
- "learning_rate": 2.5333333333333334e-05,
182
- "loss": 0.7727,
183
  "step": 80
184
  },
185
  {
186
  "epoch": 14.91,
187
- "eval_accuracy": 0.7258064516129032,
188
- "eval_loss": 0.7850545048713684,
189
- "eval_runtime": 2.2951,
190
- "eval_samples_per_second": 27.015,
191
- "eval_steps_per_second": 1.743,
192
  "step": 82
193
  },
194
  {
195
  "epoch": 16.0,
196
- "eval_accuracy": 0.7258064516129032,
197
- "eval_loss": 0.7993963956832886,
198
- "eval_runtime": 2.2405,
199
- "eval_samples_per_second": 27.672,
200
- "eval_steps_per_second": 1.785,
201
  "step": 88
202
  },
203
  {
204
  "epoch": 16.36,
205
- "learning_rate": 2.3222222222222224e-05,
206
- "loss": 0.6714,
207
  "step": 90
208
  },
209
  {
210
  "epoch": 16.91,
211
- "eval_accuracy": 0.6290322580645161,
212
- "eval_loss": 0.8245651721954346,
213
- "eval_runtime": 2.1735,
214
- "eval_samples_per_second": 28.525,
215
- "eval_steps_per_second": 1.84,
216
  "step": 93
217
  },
218
  {
219
  "epoch": 18.0,
220
- "eval_accuracy": 0.7096774193548387,
221
- "eval_loss": 0.738938570022583,
222
- "eval_runtime": 2.2195,
223
- "eval_samples_per_second": 27.934,
224
- "eval_steps_per_second": 1.802,
225
  "step": 99
226
  },
227
  {
228
  "epoch": 18.18,
229
- "learning_rate": 2.1111111111111114e-05,
230
- "loss": 0.6143,
231
  "step": 100
232
  },
233
  {
234
  "epoch": 18.91,
235
- "eval_accuracy": 0.6451612903225806,
236
- "eval_loss": 0.8202147483825684,
237
- "eval_runtime": 2.2165,
238
- "eval_samples_per_second": 27.972,
239
- "eval_steps_per_second": 1.805,
240
  "step": 104
241
  },
242
  {
243
  "epoch": 20.0,
244
- "learning_rate": 1.9e-05,
245
- "loss": 0.5398,
246
  "step": 110
247
  },
248
  {
249
  "epoch": 20.0,
250
- "eval_accuracy": 0.7741935483870968,
251
- "eval_loss": 0.6295467615127563,
252
- "eval_runtime": 2.241,
253
- "eval_samples_per_second": 27.666,
254
- "eval_steps_per_second": 1.785,
255
  "step": 110
256
  },
257
  {
258
  "epoch": 20.91,
259
- "eval_accuracy": 0.7580645161290323,
260
- "eval_loss": 0.6735957264900208,
261
- "eval_runtime": 2.3026,
262
- "eval_samples_per_second": 26.926,
263
- "eval_steps_per_second": 1.737,
264
  "step": 115
265
  },
266
  {
267
  "epoch": 21.82,
268
- "learning_rate": 1.6888888888888888e-05,
269
- "loss": 0.4958,
270
  "step": 120
271
  },
272
  {
273
  "epoch": 22.0,
274
- "eval_accuracy": 0.7903225806451613,
275
- "eval_loss": 0.6217902302742004,
276
- "eval_runtime": 2.2946,
277
- "eval_samples_per_second": 27.02,
278
- "eval_steps_per_second": 1.743,
279
  "step": 121
280
  },
281
  {
282
  "epoch": 22.91,
283
- "eval_accuracy": 0.7741935483870968,
284
- "eval_loss": 0.6400670409202576,
285
- "eval_runtime": 2.217,
286
- "eval_samples_per_second": 27.965,
287
- "eval_steps_per_second": 1.804,
288
  "step": 126
289
  },
290
  {
291
  "epoch": 23.64,
292
- "learning_rate": 1.4777777777777778e-05,
293
- "loss": 0.4561,
294
  "step": 130
295
  },
296
  {
297
  "epoch": 24.0,
298
- "eval_accuracy": 0.7258064516129032,
299
- "eval_loss": 0.6640270352363586,
300
- "eval_runtime": 2.199,
301
- "eval_samples_per_second": 28.194,
302
- "eval_steps_per_second": 1.819,
303
  "step": 132
304
  },
305
  {
306
  "epoch": 24.91,
307
- "eval_accuracy": 0.7741935483870968,
308
- "eval_loss": 0.6009158492088318,
309
- "eval_runtime": 2.2165,
310
- "eval_samples_per_second": 27.972,
311
- "eval_steps_per_second": 1.805,
312
  "step": 137
313
  },
314
  {
315
  "epoch": 25.45,
316
- "learning_rate": 1.2666666666666667e-05,
317
- "loss": 0.4149,
318
  "step": 140
319
  },
320
  {
321
  "epoch": 26.0,
322
- "eval_accuracy": 0.8064516129032258,
323
- "eval_loss": 0.5619057416915894,
324
- "eval_runtime": 2.2799,
325
- "eval_samples_per_second": 27.194,
326
- "eval_steps_per_second": 1.754,
327
  "step": 143
328
  },
329
  {
330
  "epoch": 26.91,
331
- "eval_accuracy": 0.8064516129032258,
332
- "eval_loss": 0.5118472576141357,
333
- "eval_runtime": 2.1675,
334
- "eval_samples_per_second": 28.604,
335
- "eval_steps_per_second": 1.845,
336
  "step": 148
337
  },
338
  {
339
  "epoch": 27.27,
340
- "learning_rate": 1.0555555555555557e-05,
341
- "loss": 0.3965,
342
  "step": 150
343
  },
344
  {
345
  "epoch": 28.0,
346
- "eval_accuracy": 0.8064516129032258,
347
- "eval_loss": 0.5682212114334106,
348
- "eval_runtime": 2.2446,
349
- "eval_samples_per_second": 27.622,
350
- "eval_steps_per_second": 1.782,
351
  "step": 154
352
  },
353
  {
354
  "epoch": 28.91,
355
- "eval_accuracy": 0.8064516129032258,
356
- "eval_loss": 0.5514592528343201,
357
- "eval_runtime": 2.2005,
358
- "eval_samples_per_second": 28.175,
359
- "eval_steps_per_second": 1.818,
360
  "step": 159
361
  },
362
  {
363
  "epoch": 29.09,
364
- "learning_rate": 8.444444444444444e-06,
365
- "loss": 0.4219,
366
  "step": 160
367
  },
368
  {
369
  "epoch": 30.0,
370
- "eval_accuracy": 0.7096774193548387,
371
- "eval_loss": 0.7044903039932251,
372
- "eval_runtime": 2.2835,
373
- "eval_samples_per_second": 27.151,
374
- "eval_steps_per_second": 1.752,
375
  "step": 165
376
  },
377
  {
378
  "epoch": 30.91,
379
- "learning_rate": 6.333333333333333e-06,
380
- "loss": 0.3939,
381
  "step": 170
382
  },
383
  {
384
  "epoch": 30.91,
385
- "eval_accuracy": 0.8064516129032258,
386
- "eval_loss": 0.5711750984191895,
387
- "eval_runtime": 2.2085,
388
- "eval_samples_per_second": 28.073,
389
- "eval_steps_per_second": 1.811,
390
  "step": 170
391
  },
392
  {
393
  "epoch": 32.0,
394
- "eval_accuracy": 0.8064516129032258,
395
- "eval_loss": 0.5857284069061279,
396
- "eval_runtime": 2.3796,
397
- "eval_samples_per_second": 26.055,
398
  "eval_steps_per_second": 1.681,
399
  "step": 176
400
  },
401
  {
402
  "epoch": 32.73,
403
- "learning_rate": 4.222222222222222e-06,
404
- "loss": 0.3598,
405
  "step": 180
406
  },
407
  {
408
  "epoch": 32.91,
409
- "eval_accuracy": 0.8064516129032258,
410
- "eval_loss": 0.5539482235908508,
411
- "eval_runtime": 2.3201,
412
- "eval_samples_per_second": 26.724,
413
- "eval_steps_per_second": 1.724,
414
  "step": 181
415
  },
416
  {
417
  "epoch": 34.0,
418
- "eval_accuracy": 0.8225806451612904,
419
- "eval_loss": 0.5470647215843201,
420
- "eval_runtime": 2.1645,
421
- "eval_samples_per_second": 28.644,
422
- "eval_steps_per_second": 1.848,
423
  "step": 187
424
  },
425
  {
426
  "epoch": 34.55,
427
- "learning_rate": 2.111111111111111e-06,
428
- "loss": 0.3613,
429
  "step": 190
430
  },
431
  {
432
  "epoch": 34.91,
433
- "eval_accuracy": 0.8225806451612904,
434
- "eval_loss": 0.5407957434654236,
435
- "eval_runtime": 2.233,
436
- "eval_samples_per_second": 27.765,
437
- "eval_steps_per_second": 1.791,
438
  "step": 192
439
  },
440
  {
441
  "epoch": 36.0,
442
- "eval_accuracy": 0.8387096774193549,
443
- "eval_loss": 0.5388934016227722,
444
- "eval_runtime": 2.3576,
445
- "eval_samples_per_second": 26.298,
446
- "eval_steps_per_second": 1.697,
447
  "step": 198
448
  },
449
  {
450
  "epoch": 36.36,
451
  "learning_rate": 0.0,
452
- "loss": 0.3748,
453
  "step": 200
454
  },
455
  {
456
  "epoch": 36.36,
457
- "eval_accuracy": 0.8387096774193549,
458
- "eval_loss": 0.538964569568634,
459
- "eval_runtime": 2.198,
460
- "eval_samples_per_second": 28.207,
461
- "eval_steps_per_second": 1.82,
462
  "step": 200
463
  },
464
  {
465
  "epoch": 36.36,
466
  "step": 200,
467
  "total_flos": 9.859141601338982e+17,
468
- "train_loss": 0.7515219366550445,
469
- "train_runtime": 697.4947,
470
- "train_samples_per_second": 20.072,
471
- "train_steps_per_second": 0.287
472
  }
473
  ],
474
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9032258064516129,
3
+ "best_model_checkpoint": "beit-base-patch16-224-OT-alt\\checkpoint-99",
4
  "epoch": 36.36363636363637,
5
  "eval_steps": 500,
6
  "global_step": 200,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.91,
13
+ "eval_accuracy": 0.1774193548387097,
14
+ "eval_loss": 1.7093149423599243,
15
+ "eval_runtime": 2.3415,
16
+ "eval_samples_per_second": 26.478,
17
+ "eval_steps_per_second": 1.708,
18
  "step": 5
19
  },
20
  {
21
  "epoch": 1.82,
22
+ "learning_rate": 2.5e-05,
23
+ "loss": 1.7744,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.1774193548387097,
29
+ "eval_loss": 1.6177618503570557,
30
+ "eval_runtime": 2.3771,
31
+ "eval_samples_per_second": 26.083,
32
+ "eval_steps_per_second": 1.683,
33
  "step": 11
34
  },
35
  {
36
  "epoch": 2.91,
37
+ "eval_accuracy": 0.1774193548387097,
38
+ "eval_loss": 1.473004698753357,
39
+ "eval_runtime": 2.3411,
40
+ "eval_samples_per_second": 26.484,
41
+ "eval_steps_per_second": 1.709,
42
  "step": 16
43
  },
44
  {
45
  "epoch": 3.64,
46
+ "learning_rate": 5e-05,
47
+ "loss": 1.5823,
48
  "step": 20
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.1774193548387097,
53
+ "eval_loss": 1.2754011154174805,
54
+ "eval_runtime": 2.3296,
55
+ "eval_samples_per_second": 26.614,
56
+ "eval_steps_per_second": 1.717,
57
  "step": 22
58
  },
59
  {
60
  "epoch": 4.91,
61
+ "eval_accuracy": 0.5645161290322581,
62
+ "eval_loss": 1.1454869508743286,
63
+ "eval_runtime": 2.374,
64
+ "eval_samples_per_second": 26.117,
65
+ "eval_steps_per_second": 1.685,
66
  "step": 27
67
  },
68
  {
69
  "epoch": 5.45,
70
+ "learning_rate": 4.722222222222222e-05,
71
+ "loss": 1.27,
72
  "step": 30
73
  },
74
  {
75
  "epoch": 6.0,
76
+ "eval_accuracy": 0.6290322580645161,
77
+ "eval_loss": 1.0146747827529907,
78
+ "eval_runtime": 2.4966,
79
+ "eval_samples_per_second": 24.834,
80
+ "eval_steps_per_second": 1.602,
81
  "step": 33
82
  },
83
  {
84
  "epoch": 6.91,
85
+ "eval_accuracy": 0.5483870967741935,
86
+ "eval_loss": 0.9789792895317078,
87
+ "eval_runtime": 2.3346,
88
+ "eval_samples_per_second": 26.557,
89
+ "eval_steps_per_second": 1.713,
90
  "step": 38
91
  },
92
  {
93
  "epoch": 7.27,
94
+ "learning_rate": 4.4444444444444447e-05,
95
+ "loss": 1.079,
96
  "step": 40
97
  },
98
  {
99
  "epoch": 8.0,
100
+ "eval_accuracy": 0.45161290322580644,
101
+ "eval_loss": 1.0473968982696533,
102
+ "eval_runtime": 2.3881,
103
+ "eval_samples_per_second": 25.962,
104
+ "eval_steps_per_second": 1.675,
105
  "step": 44
106
  },
107
  {
108
  "epoch": 8.91,
109
+ "eval_accuracy": 0.7580645161290323,
110
+ "eval_loss": 0.8796324729919434,
111
+ "eval_runtime": 2.3814,
112
+ "eval_samples_per_second": 26.035,
113
+ "eval_steps_per_second": 1.68,
114
  "step": 49
115
  },
116
  {
117
  "epoch": 9.09,
118
+ "learning_rate": 4.166666666666667e-05,
119
+ "loss": 1.005,
120
  "step": 50
121
  },
122
  {
123
  "epoch": 10.0,
124
+ "eval_accuracy": 0.7741935483870968,
125
+ "eval_loss": 0.7759426832199097,
126
+ "eval_runtime": 2.3246,
127
+ "eval_samples_per_second": 26.672,
128
+ "eval_steps_per_second": 1.721,
129
  "step": 55
130
  },
131
  {
132
  "epoch": 10.91,
133
+ "learning_rate": 3.888888888888889e-05,
134
+ "loss": 0.8479,
135
  "step": 60
136
  },
137
  {
138
  "epoch": 10.91,
139
+ "eval_accuracy": 0.8225806451612904,
140
+ "eval_loss": 0.7421430349349976,
141
+ "eval_runtime": 2.3181,
142
+ "eval_samples_per_second": 26.747,
143
+ "eval_steps_per_second": 1.726,
144
  "step": 60
145
  },
146
  {
147
  "epoch": 12.0,
148
+ "eval_accuracy": 0.8548387096774194,
149
+ "eval_loss": 0.6759869456291199,
150
+ "eval_runtime": 2.3471,
151
+ "eval_samples_per_second": 26.416,
152
+ "eval_steps_per_second": 1.704,
153
  "step": 66
154
  },
155
  {
156
  "epoch": 12.73,
157
+ "learning_rate": 3.611111111111111e-05,
158
+ "loss": 0.7695,
159
  "step": 70
160
  },
161
  {
162
  "epoch": 12.91,
163
+ "eval_accuracy": 0.8387096774193549,
164
+ "eval_loss": 0.5932707190513611,
165
+ "eval_runtime": 2.3526,
166
+ "eval_samples_per_second": 26.354,
167
+ "eval_steps_per_second": 1.7,
168
  "step": 71
169
  },
170
  {
171
  "epoch": 14.0,
172
+ "eval_accuracy": 0.7741935483870968,
173
+ "eval_loss": 0.6372359991073608,
174
+ "eval_runtime": 2.2975,
175
+ "eval_samples_per_second": 26.985,
176
+ "eval_steps_per_second": 1.741,
177
  "step": 77
178
  },
179
  {
180
  "epoch": 14.55,
181
+ "learning_rate": 3.3333333333333335e-05,
182
+ "loss": 0.6591,
183
  "step": 80
184
  },
185
  {
186
  "epoch": 14.91,
187
+ "eval_accuracy": 0.8387096774193549,
188
+ "eval_loss": 0.5653398633003235,
189
+ "eval_runtime": 2.2901,
190
+ "eval_samples_per_second": 27.074,
191
+ "eval_steps_per_second": 1.747,
192
  "step": 82
193
  },
194
  {
195
  "epoch": 16.0,
196
+ "eval_accuracy": 0.8709677419354839,
197
+ "eval_loss": 0.4950495958328247,
198
+ "eval_runtime": 2.4381,
199
+ "eval_samples_per_second": 25.43,
200
+ "eval_steps_per_second": 1.641,
201
  "step": 88
202
  },
203
  {
204
  "epoch": 16.36,
205
+ "learning_rate": 3.055555555555556e-05,
206
+ "loss": 0.5675,
207
  "step": 90
208
  },
209
  {
210
  "epoch": 16.91,
211
+ "eval_accuracy": 0.8225806451612904,
212
+ "eval_loss": 0.5040029287338257,
213
+ "eval_runtime": 2.3556,
214
+ "eval_samples_per_second": 26.321,
215
+ "eval_steps_per_second": 1.698,
216
  "step": 93
217
  },
218
  {
219
  "epoch": 18.0,
220
+ "eval_accuracy": 0.9032258064516129,
221
+ "eval_loss": 0.4273931682109833,
222
+ "eval_runtime": 2.3296,
223
+ "eval_samples_per_second": 26.614,
224
+ "eval_steps_per_second": 1.717,
225
  "step": 99
226
  },
227
  {
228
  "epoch": 18.18,
229
+ "learning_rate": 2.777777777777778e-05,
230
+ "loss": 0.5134,
231
  "step": 100
232
  },
233
  {
234
  "epoch": 18.91,
235
+ "eval_accuracy": 0.8548387096774194,
236
+ "eval_loss": 0.461721807718277,
237
+ "eval_runtime": 2.4876,
238
+ "eval_samples_per_second": 24.923,
239
+ "eval_steps_per_second": 1.608,
240
  "step": 104
241
  },
242
  {
243
  "epoch": 20.0,
244
+ "learning_rate": 2.5e-05,
245
+ "loss": 0.4418,
246
  "step": 110
247
  },
248
  {
249
  "epoch": 20.0,
250
+ "eval_accuracy": 0.8870967741935484,
251
+ "eval_loss": 0.4244731366634369,
252
+ "eval_runtime": 2.3501,
253
+ "eval_samples_per_second": 26.382,
254
+ "eval_steps_per_second": 1.702,
255
  "step": 110
256
  },
257
  {
258
  "epoch": 20.91,
259
+ "eval_accuracy": 0.8387096774193549,
260
+ "eval_loss": 0.4922010600566864,
261
+ "eval_runtime": 2.4186,
262
+ "eval_samples_per_second": 25.635,
263
+ "eval_steps_per_second": 1.654,
264
  "step": 115
265
  },
266
  {
267
  "epoch": 21.82,
268
+ "learning_rate": 2.2222222222222223e-05,
269
+ "loss": 0.402,
270
  "step": 120
271
  },
272
  {
273
  "epoch": 22.0,
274
+ "eval_accuracy": 0.8225806451612904,
275
+ "eval_loss": 0.5112457275390625,
276
+ "eval_runtime": 2.4655,
277
+ "eval_samples_per_second": 25.147,
278
+ "eval_steps_per_second": 1.622,
279
  "step": 121
280
  },
281
  {
282
  "epoch": 22.91,
283
+ "eval_accuracy": 0.8548387096774194,
284
+ "eval_loss": 0.46961790323257446,
285
+ "eval_runtime": 2.2816,
286
+ "eval_samples_per_second": 27.174,
287
+ "eval_steps_per_second": 1.753,
288
  "step": 126
289
  },
290
  {
291
  "epoch": 23.64,
292
+ "learning_rate": 1.9444444444444445e-05,
293
+ "loss": 0.4039,
294
  "step": 130
295
  },
296
  {
297
  "epoch": 24.0,
298
+ "eval_accuracy": 0.8709677419354839,
299
+ "eval_loss": 0.40138500928878784,
300
+ "eval_runtime": 2.3996,
301
+ "eval_samples_per_second": 25.838,
302
+ "eval_steps_per_second": 1.667,
303
  "step": 132
304
  },
305
  {
306
  "epoch": 24.91,
307
+ "eval_accuracy": 0.8225806451612904,
308
+ "eval_loss": 0.5005894899368286,
309
+ "eval_runtime": 2.3451,
310
+ "eval_samples_per_second": 26.439,
311
+ "eval_steps_per_second": 1.706,
312
  "step": 137
313
  },
314
  {
315
  "epoch": 25.45,
316
+ "learning_rate": 1.6666666666666667e-05,
317
+ "loss": 0.4216,
318
  "step": 140
319
  },
320
  {
321
  "epoch": 26.0,
322
+ "eval_accuracy": 0.8548387096774194,
323
+ "eval_loss": 0.5351113080978394,
324
+ "eval_runtime": 2.4946,
325
+ "eval_samples_per_second": 24.853,
326
+ "eval_steps_per_second": 1.603,
327
  "step": 143
328
  },
329
  {
330
  "epoch": 26.91,
331
+ "eval_accuracy": 0.8548387096774194,
332
+ "eval_loss": 0.5202920436859131,
333
+ "eval_runtime": 2.3236,
334
+ "eval_samples_per_second": 26.683,
335
+ "eval_steps_per_second": 1.721,
336
  "step": 148
337
  },
338
  {
339
  "epoch": 27.27,
340
+ "learning_rate": 1.388888888888889e-05,
341
+ "loss": 0.3593,
342
  "step": 150
343
  },
344
  {
345
  "epoch": 28.0,
346
+ "eval_accuracy": 0.8548387096774194,
347
+ "eval_loss": 0.4081813395023346,
348
+ "eval_runtime": 2.3351,
349
+ "eval_samples_per_second": 26.552,
350
+ "eval_steps_per_second": 1.713,
351
  "step": 154
352
  },
353
  {
354
  "epoch": 28.91,
355
+ "eval_accuracy": 0.8709677419354839,
356
+ "eval_loss": 0.40170377492904663,
357
+ "eval_runtime": 2.3078,
358
+ "eval_samples_per_second": 26.865,
359
+ "eval_steps_per_second": 1.733,
360
  "step": 159
361
  },
362
  {
363
  "epoch": 29.09,
364
+ "learning_rate": 1.1111111111111112e-05,
365
+ "loss": 0.3638,
366
  "step": 160
367
  },
368
  {
369
  "epoch": 30.0,
370
+ "eval_accuracy": 0.8870967741935484,
371
+ "eval_loss": 0.40682506561279297,
372
+ "eval_runtime": 2.5896,
373
+ "eval_samples_per_second": 23.942,
374
+ "eval_steps_per_second": 1.545,
375
  "step": 165
376
  },
377
  {
378
  "epoch": 30.91,
379
+ "learning_rate": 8.333333333333334e-06,
380
+ "loss": 0.3509,
381
  "step": 170
382
  },
383
  {
384
  "epoch": 30.91,
385
+ "eval_accuracy": 0.8870967741935484,
386
+ "eval_loss": 0.3990916311740875,
387
+ "eval_runtime": 2.75,
388
+ "eval_samples_per_second": 22.546,
389
+ "eval_steps_per_second": 1.455,
390
  "step": 170
391
  },
392
  {
393
  "epoch": 32.0,
394
+ "eval_accuracy": 0.8709677419354839,
395
+ "eval_loss": 0.3964940309524536,
396
+ "eval_runtime": 2.3794,
397
+ "eval_samples_per_second": 26.056,
398
  "eval_steps_per_second": 1.681,
399
  "step": 176
400
  },
401
  {
402
  "epoch": 32.73,
403
+ "learning_rate": 5.555555555555556e-06,
404
+ "loss": 0.3426,
405
  "step": 180
406
  },
407
  {
408
  "epoch": 32.91,
409
+ "eval_accuracy": 0.8709677419354839,
410
+ "eval_loss": 0.3921041190624237,
411
+ "eval_runtime": 2.3356,
412
+ "eval_samples_per_second": 26.546,
413
+ "eval_steps_per_second": 1.713,
414
  "step": 181
415
  },
416
  {
417
  "epoch": 34.0,
418
+ "eval_accuracy": 0.8709677419354839,
419
+ "eval_loss": 0.39981809258461,
420
+ "eval_runtime": 2.4784,
421
+ "eval_samples_per_second": 25.016,
422
+ "eval_steps_per_second": 1.614,
423
  "step": 187
424
  },
425
  {
426
  "epoch": 34.55,
427
+ "learning_rate": 2.777777777777778e-06,
428
+ "loss": 0.3253,
429
  "step": 190
430
  },
431
  {
432
  "epoch": 34.91,
433
+ "eval_accuracy": 0.8870967741935484,
434
+ "eval_loss": 0.41015544533729553,
435
+ "eval_runtime": 2.3626,
436
+ "eval_samples_per_second": 26.243,
437
+ "eval_steps_per_second": 1.693,
438
  "step": 192
439
  },
440
  {
441
  "epoch": 36.0,
442
+ "eval_accuracy": 0.8870967741935484,
443
+ "eval_loss": 0.4080694913864136,
444
+ "eval_runtime": 2.3858,
445
+ "eval_samples_per_second": 25.987,
446
+ "eval_steps_per_second": 1.677,
447
  "step": 198
448
  },
449
  {
450
  "epoch": 36.36,
451
  "learning_rate": 0.0,
452
+ "loss": 0.3085,
453
  "step": 200
454
  },
455
  {
456
  "epoch": 36.36,
457
+ "eval_accuracy": 0.8870967741935484,
458
+ "eval_loss": 0.40834033489227295,
459
+ "eval_runtime": 2.3711,
460
+ "eval_samples_per_second": 26.149,
461
+ "eval_steps_per_second": 1.687,
462
  "step": 200
463
  },
464
  {
465
  "epoch": 36.36,
466
  "step": 200,
467
  "total_flos": 9.859141601338982e+17,
468
+ "train_loss": 0.6893912732601166,
469
+ "train_runtime": 709.0628,
470
+ "train_samples_per_second": 19.744,
471
+ "train_steps_per_second": 0.282
472
  }
473
  ],
474
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b6f66eb1a9e3f3c50c5a51c5595513566317820c38ec61d1d6b26f52be64b51
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9485465125f9e78e6dd67b509633fd1622ddef7452bb952f0688e13435966aee
3
  size 4728