moock commited on
Commit
05a07a9
1 Parent(s): a2bcce8

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8355704697986577
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,12 +32,12 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.5500
36
- - Accuracy: 0.8356
37
 
38
  ## Model description
39
 
40
- Predict Expansion Grade - Gardner Score from an embryo image
41
 
42
  ## Intended uses & limitations
43
 
@@ -61,27 +61,37 @@ The following hyperparameters were used during training:
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: linear
63
  - lr_scheduler_warmup_ratio: 0.1
64
- - num_epochs: 15
65
 
66
  ### Training results
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
- | 1.6043 | 0.97 | 14 | 1.5288 | 0.5415 |
71
- | 1.4967 | 2.0 | 29 | 1.1719 | 0.5415 |
72
- | 1.1276 | 2.97 | 43 | 1.0525 | 0.5463 |
73
- | 1.0796 | 4.0 | 58 | 0.9086 | 0.6537 |
74
- | 0.9387 | 4.97 | 72 | 0.8500 | 0.6439 |
75
- | 0.9232 | 6.0 | 87 | 0.8190 | 0.6732 |
76
- | 0.8456 | 6.97 | 101 | 0.8042 | 0.6878 |
77
- | 0.8348 | 8.0 | 116 | 0.7770 | 0.6927 |
78
- | 0.8057 | 8.97 | 130 | 0.7457 | 0.7073 |
79
- | 0.8033 | 10.0 | 145 | 0.7353 | 0.7024 |
80
- | 0.7822 | 10.97 | 159 | 0.7166 | 0.7122 |
81
- | 0.7594 | 12.0 | 174 | 0.7188 | 0.7171 |
82
- | 0.7777 | 12.97 | 188 | 0.7086 | 0.7171 |
83
- | 0.7445 | 14.0 | 203 | 0.7139 | 0.6878 |
84
- | 0.7513 | 14.48 | 210 | 0.7139 | 0.6878 |
 
 
 
 
 
 
 
 
 
 
85
 
86
 
87
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8389261744966443
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5312
36
+ - Accuracy: 0.8389
37
 
38
  ## Model description
39
 
40
+ More information needed
41
 
42
  ## Intended uses & limitations
43
 
 
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: linear
63
  - lr_scheduler_warmup_ratio: 0.1
64
+ - num_epochs: 25
65
 
66
  ### Training results
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | 1.6068 | 0.97 | 14 | 1.5809 | 0.5415 |
71
+ | 1.56 | 2.0 | 29 | 1.2830 | 0.5415 |
72
+ | 1.1852 | 2.97 | 43 | 1.0794 | 0.5415 |
73
+ | 1.1132 | 4.0 | 58 | 0.9314 | 0.6488 |
74
+ | 0.9416 | 4.97 | 72 | 0.8935 | 0.6341 |
75
+ | 0.9143 | 6.0 | 87 | 0.8009 | 0.6829 |
76
+ | 0.8243 | 6.97 | 101 | 0.8067 | 0.6634 |
77
+ | 0.8171 | 8.0 | 116 | 0.7783 | 0.6780 |
78
+ | 0.7901 | 8.97 | 130 | 0.7871 | 0.6585 |
79
+ | 0.7944 | 10.0 | 145 | 0.7414 | 0.6976 |
80
+ | 0.7669 | 10.97 | 159 | 0.6977 | 0.7122 |
81
+ | 0.7478 | 12.0 | 174 | 0.7043 | 0.7122 |
82
+ | 0.766 | 12.97 | 188 | 0.7778 | 0.6585 |
83
+ | 0.7322 | 14.0 | 203 | 0.7504 | 0.6780 |
84
+ | 0.7242 | 14.97 | 217 | 0.7291 | 0.6829 |
85
+ | 0.7554 | 16.0 | 232 | 0.7694 | 0.6634 |
86
+ | 0.7422 | 16.97 | 246 | 0.7569 | 0.6829 |
87
+ | 0.7292 | 18.0 | 261 | 0.7389 | 0.6780 |
88
+ | 0.7354 | 18.97 | 275 | 0.6684 | 0.7122 |
89
+ | 0.6847 | 20.0 | 290 | 0.6821 | 0.7122 |
90
+ | 0.7231 | 20.97 | 304 | 0.6839 | 0.7024 |
91
+ | 0.6962 | 22.0 | 319 | 0.6958 | 0.6878 |
92
+ | 0.7079 | 22.97 | 333 | 0.7039 | 0.6878 |
93
+ | 0.7088 | 24.0 | 348 | 0.6974 | 0.6878 |
94
+ | 0.7106 | 24.14 | 350 | 0.6975 | 0.6878 |
95
 
96
 
97
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 14.48,
3
- "eval_accuracy": 0.8355704697986577,
4
- "eval_loss": 0.5500471591949463,
5
- "eval_runtime": 5.0985,
6
- "eval_samples_per_second": 58.448,
7
- "eval_steps_per_second": 1.961,
8
- "train_loss": 0.9456698463076637,
9
- "train_runtime": 1102.737,
10
- "train_samples_per_second": 25.015,
11
- "train_steps_per_second": 0.19
12
  }
 
1
  {
2
+ "epoch": 24.14,
3
+ "eval_accuracy": 0.8389261744966443,
4
+ "eval_loss": 0.5312080979347229,
5
+ "eval_runtime": 6.5357,
6
+ "eval_samples_per_second": 45.596,
7
+ "eval_steps_per_second": 1.53,
8
+ "train_loss": 0.8573012270246233,
9
+ "train_runtime": 2334.0273,
10
+ "train_samples_per_second": 19.698,
11
+ "train_steps_per_second": 0.15
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 14.48,
3
- "eval_accuracy": 0.8355704697986577,
4
- "eval_loss": 0.5500471591949463,
5
- "eval_runtime": 5.0985,
6
- "eval_samples_per_second": 58.448,
7
- "eval_steps_per_second": 1.961
8
  }
 
1
  {
2
+ "epoch": 24.14,
3
+ "eval_accuracy": 0.8389261744966443,
4
+ "eval_loss": 0.5312080979347229,
5
+ "eval_runtime": 6.5357,
6
+ "eval_samples_per_second": 45.596,
7
+ "eval_steps_per_second": 1.53
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ba5d11712824f6d4f6f330378764fd34aae7206d7f052e681a25944638ca39e
3
  size 110359372
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3efa52dfd3f432fcd7e64448044d880f0ab3510db098be6c7896a1dadb4c6fd3
3
  size 110359372
runs/Dec28_00-52-56_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703721182.MacBook-Pro-de-Max-2.local.31343.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec33d0976e9fa7bb349d40c55b93cb4f7052e406b8a42eb4765391a2702541e
3
+ size 18671
runs/Dec28_00-52-56_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703723525.MacBook-Pro-de-Max-2.local.31343.10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29da514cabfbb21c5dcf1d41eef3eb7d18dfd601dbc797059b7dcc153dc93f68
3
+ size 734
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 14.48,
3
- "train_loss": 0.9456698463076637,
4
- "train_runtime": 1102.737,
5
- "train_samples_per_second": 25.015,
6
- "train_steps_per_second": 0.19
7
  }
 
1
  {
2
+ "epoch": 24.14,
3
+ "train_loss": 0.8573012270246233,
4
+ "train_runtime": 2334.0273,
5
+ "train_samples_per_second": 19.698,
6
+ "train_steps_per_second": 0.15
7
  }
trainer_state.json CHANGED
@@ -1,290 +1,464 @@
1
  {
2
- "best_metric": 0.7170731707317073,
3
- "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-gardner-exp-max/checkpoint-174",
4
- "epoch": 14.482758620689655,
5
  "eval_steps": 500,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.69,
13
- "learning_rate": 2.380952380952381e-05,
14
- "loss": 1.6043,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.97,
19
  "eval_accuracy": 0.5414634146341464,
20
- "eval_loss": 1.5287535190582275,
21
- "eval_runtime": 3.5003,
22
- "eval_samples_per_second": 58.567,
23
- "eval_steps_per_second": 2.0,
24
  "step": 14
25
  },
26
  {
27
  "epoch": 1.38,
28
- "learning_rate": 4.761904761904762e-05,
29
- "loss": 1.4967,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 2.0,
34
  "eval_accuracy": 0.5414634146341464,
35
- "eval_loss": 1.1718776226043701,
36
- "eval_runtime": 3.523,
37
- "eval_samples_per_second": 58.189,
38
- "eval_steps_per_second": 1.987,
39
  "step": 29
40
  },
41
  {
42
  "epoch": 2.07,
43
- "learning_rate": 4.761904761904762e-05,
44
- "loss": 1.2615,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 2.76,
49
- "learning_rate": 4.4973544973544974e-05,
50
- "loss": 1.1276,
51
  "step": 40
52
  },
53
  {
54
  "epoch": 2.97,
55
- "eval_accuracy": 0.5463414634146342,
56
- "eval_loss": 1.052482008934021,
57
- "eval_runtime": 3.482,
58
- "eval_samples_per_second": 58.874,
59
- "eval_steps_per_second": 2.01,
60
  "step": 43
61
  },
62
  {
63
  "epoch": 3.45,
64
- "learning_rate": 4.232804232804233e-05,
65
- "loss": 1.0796,
66
  "step": 50
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.6536585365853659,
71
- "eval_loss": 0.9085918664932251,
72
- "eval_runtime": 3.5324,
73
- "eval_samples_per_second": 58.034,
74
- "eval_steps_per_second": 1.982,
75
  "step": 58
76
  },
77
  {
78
  "epoch": 4.14,
79
- "learning_rate": 3.968253968253968e-05,
80
- "loss": 1.0019,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 4.83,
85
- "learning_rate": 3.7037037037037037e-05,
86
- "loss": 0.9387,
87
  "step": 70
88
  },
89
  {
90
  "epoch": 4.97,
91
- "eval_accuracy": 0.6439024390243903,
92
- "eval_loss": 0.8500024676322937,
93
- "eval_runtime": 3.5195,
94
- "eval_samples_per_second": 58.246,
95
- "eval_steps_per_second": 1.989,
96
  "step": 72
97
  },
98
  {
99
  "epoch": 5.52,
100
- "learning_rate": 3.439153439153439e-05,
101
- "loss": 0.9232,
102
  "step": 80
103
  },
104
  {
105
  "epoch": 6.0,
106
- "eval_accuracy": 0.6731707317073171,
107
- "eval_loss": 0.8189888000488281,
108
- "eval_runtime": 3.532,
109
- "eval_samples_per_second": 58.041,
110
- "eval_steps_per_second": 1.982,
111
  "step": 87
112
  },
113
  {
114
  "epoch": 6.21,
115
- "learning_rate": 3.1746031746031745e-05,
116
- "loss": 0.9018,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 6.9,
121
- "learning_rate": 2.91005291005291e-05,
122
- "loss": 0.8456,
123
  "step": 100
124
  },
125
  {
126
  "epoch": 6.97,
127
- "eval_accuracy": 0.6878048780487804,
128
- "eval_loss": 0.8041830062866211,
129
- "eval_runtime": 3.4986,
130
- "eval_samples_per_second": 58.594,
131
- "eval_steps_per_second": 2.001,
132
  "step": 101
133
  },
134
  {
135
  "epoch": 7.59,
136
- "learning_rate": 2.6455026455026456e-05,
137
- "loss": 0.8348,
138
  "step": 110
139
  },
140
  {
141
  "epoch": 8.0,
142
- "eval_accuracy": 0.6926829268292682,
143
- "eval_loss": 0.7769930362701416,
144
- "eval_runtime": 3.5012,
145
- "eval_samples_per_second": 58.552,
146
- "eval_steps_per_second": 1.999,
147
  "step": 116
148
  },
149
  {
150
  "epoch": 8.28,
151
- "learning_rate": 2.380952380952381e-05,
152
- "loss": 0.8287,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 8.97,
157
- "learning_rate": 2.1164021164021164e-05,
158
- "loss": 0.8057,
159
  "step": 130
160
  },
161
  {
162
  "epoch": 8.97,
163
- "eval_accuracy": 0.7073170731707317,
164
- "eval_loss": 0.7456830143928528,
165
- "eval_runtime": 3.4827,
166
- "eval_samples_per_second": 58.862,
167
- "eval_steps_per_second": 2.01,
168
  "step": 130
169
  },
170
  {
171
  "epoch": 9.66,
172
- "learning_rate": 1.8518518518518518e-05,
173
- "loss": 0.8033,
174
  "step": 140
175
  },
176
  {
177
  "epoch": 10.0,
178
- "eval_accuracy": 0.7024390243902439,
179
- "eval_loss": 0.7352668642997742,
180
- "eval_runtime": 3.5384,
181
- "eval_samples_per_second": 57.935,
182
- "eval_steps_per_second": 1.978,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 10.34,
187
- "learning_rate": 1.5873015873015872e-05,
188
- "loss": 0.7822,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 10.97,
193
  "eval_accuracy": 0.7121951219512195,
194
- "eval_loss": 0.7165755033493042,
195
- "eval_runtime": 3.4957,
196
- "eval_samples_per_second": 58.643,
197
- "eval_steps_per_second": 2.002,
198
  "step": 159
199
  },
200
  {
201
  "epoch": 11.03,
202
- "learning_rate": 1.3227513227513228e-05,
203
- "loss": 0.8041,
204
  "step": 160
205
  },
206
  {
207
  "epoch": 11.72,
208
- "learning_rate": 1.0582010582010582e-05,
209
- "loss": 0.7594,
210
  "step": 170
211
  },
212
  {
213
  "epoch": 12.0,
214
- "eval_accuracy": 0.7170731707317073,
215
- "eval_loss": 0.718829333782196,
216
- "eval_runtime": 3.5638,
217
- "eval_samples_per_second": 57.522,
218
- "eval_steps_per_second": 1.964,
219
  "step": 174
220
  },
221
  {
222
  "epoch": 12.41,
223
- "learning_rate": 7.936507936507936e-06,
224
- "loss": 0.7777,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 12.97,
229
- "eval_accuracy": 0.7170731707317073,
230
- "eval_loss": 0.7086274027824402,
231
- "eval_runtime": 3.4886,
232
- "eval_samples_per_second": 58.762,
233
- "eval_steps_per_second": 2.007,
234
  "step": 188
235
  },
236
  {
237
  "epoch": 13.1,
238
- "learning_rate": 5.291005291005291e-06,
239
- "loss": 0.7863,
240
  "step": 190
241
  },
242
  {
243
  "epoch": 13.79,
244
- "learning_rate": 2.6455026455026455e-06,
245
- "loss": 0.7445,
246
  "step": 200
247
  },
248
  {
249
  "epoch": 14.0,
250
- "eval_accuracy": 0.6878048780487804,
251
- "eval_loss": 0.7139347791671753,
252
- "eval_runtime": 3.4798,
253
- "eval_samples_per_second": 58.912,
254
- "eval_steps_per_second": 2.012,
255
  "step": 203
256
  },
257
  {
258
  "epoch": 14.48,
259
- "learning_rate": 0.0,
260
- "loss": 0.7513,
261
  "step": 210
262
  },
263
  {
264
- "epoch": 14.48,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  "eval_accuracy": 0.6878048780487804,
266
- "eval_loss": 0.7138883471488953,
267
- "eval_runtime": 3.48,
268
- "eval_samples_per_second": 58.908,
269
- "eval_steps_per_second": 2.012,
270
- "step": 210
271
  },
272
  {
273
- "epoch": 14.48,
274
- "step": 210,
275
- "total_flos": 8.6685208493713e+17,
276
- "train_loss": 0.9456698463076637,
277
- "train_runtime": 1102.737,
278
- "train_samples_per_second": 25.015,
279
- "train_steps_per_second": 0.19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  }
281
  ],
282
  "logging_steps": 10,
283
- "max_steps": 210,
284
  "num_input_tokens_seen": 0,
285
- "num_train_epochs": 15,
286
  "save_steps": 500,
287
- "total_flos": 8.6685208493713e+17,
288
  "train_batch_size": 32,
289
  "trial_name": null,
290
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7121951219512195,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-gardner-exp-max/checkpoint-159",
4
+ "epoch": 24.137931034482758,
5
  "eval_steps": 500,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.69,
13
+ "learning_rate": 1.4285714285714285e-05,
14
+ "loss": 1.6068,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.97,
19
  "eval_accuracy": 0.5414634146341464,
20
+ "eval_loss": 1.5808794498443604,
21
+ "eval_runtime": 3.5265,
22
+ "eval_samples_per_second": 58.131,
23
+ "eval_steps_per_second": 1.985,
24
  "step": 14
25
  },
26
  {
27
  "epoch": 1.38,
28
+ "learning_rate": 2.857142857142857e-05,
29
+ "loss": 1.56,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 2.0,
34
  "eval_accuracy": 0.5414634146341464,
35
+ "eval_loss": 1.2830290794372559,
36
+ "eval_runtime": 3.4914,
37
+ "eval_samples_per_second": 58.716,
38
+ "eval_steps_per_second": 2.005,
39
  "step": 29
40
  },
41
  {
42
  "epoch": 2.07,
43
+ "learning_rate": 4.2857142857142856e-05,
44
+ "loss": 1.3595,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 2.76,
49
+ "learning_rate": 4.9206349206349204e-05,
50
+ "loss": 1.1852,
51
  "step": 40
52
  },
53
  {
54
  "epoch": 2.97,
55
+ "eval_accuracy": 0.5414634146341464,
56
+ "eval_loss": 1.0793886184692383,
57
+ "eval_runtime": 3.5623,
58
+ "eval_samples_per_second": 57.547,
59
+ "eval_steps_per_second": 1.965,
60
  "step": 43
61
  },
62
  {
63
  "epoch": 3.45,
64
+ "learning_rate": 4.761904761904762e-05,
65
+ "loss": 1.1132,
66
  "step": 50
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "eval_accuracy": 0.6487804878048781,
71
+ "eval_loss": 0.9314356446266174,
72
+ "eval_runtime": 3.4589,
73
+ "eval_samples_per_second": 59.268,
74
+ "eval_steps_per_second": 2.024,
75
  "step": 58
76
  },
77
  {
78
  "epoch": 4.14,
79
+ "learning_rate": 4.603174603174603e-05,
80
+ "loss": 1.024,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 4.83,
85
+ "learning_rate": 4.4444444444444447e-05,
86
+ "loss": 0.9416,
87
  "step": 70
88
  },
89
  {
90
  "epoch": 4.97,
91
+ "eval_accuracy": 0.6341463414634146,
92
+ "eval_loss": 0.8935254216194153,
93
+ "eval_runtime": 3.4888,
94
+ "eval_samples_per_second": 58.759,
95
+ "eval_steps_per_second": 2.006,
96
  "step": 72
97
  },
98
  {
99
  "epoch": 5.52,
100
+ "learning_rate": 4.2857142857142856e-05,
101
+ "loss": 0.9143,
102
  "step": 80
103
  },
104
  {
105
  "epoch": 6.0,
106
+ "eval_accuracy": 0.6829268292682927,
107
+ "eval_loss": 0.8009229898452759,
108
+ "eval_runtime": 3.465,
109
+ "eval_samples_per_second": 59.163,
110
+ "eval_steps_per_second": 2.02,
111
  "step": 87
112
  },
113
  {
114
  "epoch": 6.21,
115
+ "learning_rate": 4.126984126984127e-05,
116
+ "loss": 0.8868,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 6.9,
121
+ "learning_rate": 3.968253968253968e-05,
122
+ "loss": 0.8243,
123
  "step": 100
124
  },
125
  {
126
  "epoch": 6.97,
127
+ "eval_accuracy": 0.6634146341463415,
128
+ "eval_loss": 0.8067137002944946,
129
+ "eval_runtime": 3.5324,
130
+ "eval_samples_per_second": 58.034,
131
+ "eval_steps_per_second": 1.982,
132
  "step": 101
133
  },
134
  {
135
  "epoch": 7.59,
136
+ "learning_rate": 3.809523809523809e-05,
137
+ "loss": 0.8171,
138
  "step": 110
139
  },
140
  {
141
  "epoch": 8.0,
142
+ "eval_accuracy": 0.6780487804878049,
143
+ "eval_loss": 0.7783121466636658,
144
+ "eval_runtime": 3.4599,
145
+ "eval_samples_per_second": 59.25,
146
+ "eval_steps_per_second": 2.023,
147
  "step": 116
148
  },
149
  {
150
  "epoch": 8.28,
151
+ "learning_rate": 3.650793650793651e-05,
152
+ "loss": 0.8161,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 8.97,
157
+ "learning_rate": 3.492063492063492e-05,
158
+ "loss": 0.7901,
159
  "step": 130
160
  },
161
  {
162
  "epoch": 8.97,
163
+ "eval_accuracy": 0.6585365853658537,
164
+ "eval_loss": 0.7871080636978149,
165
+ "eval_runtime": 3.4561,
166
+ "eval_samples_per_second": 59.316,
167
+ "eval_steps_per_second": 2.025,
168
  "step": 130
169
  },
170
  {
171
  "epoch": 9.66,
172
+ "learning_rate": 3.3333333333333335e-05,
173
+ "loss": 0.7944,
174
  "step": 140
175
  },
176
  {
177
  "epoch": 10.0,
178
+ "eval_accuracy": 0.697560975609756,
179
+ "eval_loss": 0.7413551211357117,
180
+ "eval_runtime": 3.4339,
181
+ "eval_samples_per_second": 59.699,
182
+ "eval_steps_per_second": 2.038,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 10.34,
187
+ "learning_rate": 3.1746031746031745e-05,
188
+ "loss": 0.7669,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 10.97,
193
  "eval_accuracy": 0.7121951219512195,
194
+ "eval_loss": 0.6976904273033142,
195
+ "eval_runtime": 3.4718,
196
+ "eval_samples_per_second": 59.047,
197
+ "eval_steps_per_second": 2.016,
198
  "step": 159
199
  },
200
  {
201
  "epoch": 11.03,
202
+ "learning_rate": 3.0158730158730158e-05,
203
+ "loss": 0.7961,
204
  "step": 160
205
  },
206
  {
207
  "epoch": 11.72,
208
+ "learning_rate": 2.857142857142857e-05,
209
+ "loss": 0.7478,
210
  "step": 170
211
  },
212
  {
213
  "epoch": 12.0,
214
+ "eval_accuracy": 0.7121951219512195,
215
+ "eval_loss": 0.7042645812034607,
216
+ "eval_runtime": 3.4485,
217
+ "eval_samples_per_second": 59.447,
218
+ "eval_steps_per_second": 2.03,
219
  "step": 174
220
  },
221
  {
222
  "epoch": 12.41,
223
+ "learning_rate": 2.6984126984126984e-05,
224
+ "loss": 0.766,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 12.97,
229
+ "eval_accuracy": 0.6585365853658537,
230
+ "eval_loss": 0.7778439521789551,
231
+ "eval_runtime": 3.6047,
232
+ "eval_samples_per_second": 56.87,
233
+ "eval_steps_per_second": 1.942,
234
  "step": 188
235
  },
236
  {
237
  "epoch": 13.1,
238
+ "learning_rate": 2.5396825396825397e-05,
239
+ "loss": 0.7691,
240
  "step": 190
241
  },
242
  {
243
  "epoch": 13.79,
244
+ "learning_rate": 2.380952380952381e-05,
245
+ "loss": 0.7322,
246
  "step": 200
247
  },
248
  {
249
  "epoch": 14.0,
250
+ "eval_accuracy": 0.6780487804878049,
251
+ "eval_loss": 0.7503620386123657,
252
+ "eval_runtime": 3.4523,
253
+ "eval_samples_per_second": 59.381,
254
+ "eval_steps_per_second": 2.028,
255
  "step": 203
256
  },
257
  {
258
  "epoch": 14.48,
259
+ "learning_rate": 2.2222222222222223e-05,
260
+ "loss": 0.7242,
261
  "step": 210
262
  },
263
  {
264
+ "epoch": 14.97,
265
+ "eval_accuracy": 0.6829268292682927,
266
+ "eval_loss": 0.7290918827056885,
267
+ "eval_runtime": 3.5373,
268
+ "eval_samples_per_second": 57.954,
269
+ "eval_steps_per_second": 1.979,
270
+ "step": 217
271
+ },
272
+ {
273
+ "epoch": 15.17,
274
+ "learning_rate": 2.0634920634920636e-05,
275
+ "loss": 0.7172,
276
+ "step": 220
277
+ },
278
+ {
279
+ "epoch": 15.86,
280
+ "learning_rate": 1.9047619047619046e-05,
281
+ "loss": 0.7554,
282
+ "step": 230
283
+ },
284
+ {
285
+ "epoch": 16.0,
286
+ "eval_accuracy": 0.6634146341463415,
287
+ "eval_loss": 0.7694374918937683,
288
+ "eval_runtime": 3.4718,
289
+ "eval_samples_per_second": 59.047,
290
+ "eval_steps_per_second": 2.016,
291
+ "step": 232
292
+ },
293
+ {
294
+ "epoch": 16.55,
295
+ "learning_rate": 1.746031746031746e-05,
296
+ "loss": 0.7422,
297
+ "step": 240
298
+ },
299
+ {
300
+ "epoch": 16.97,
301
+ "eval_accuracy": 0.6829268292682927,
302
+ "eval_loss": 0.7568630576133728,
303
+ "eval_runtime": 3.5281,
304
+ "eval_samples_per_second": 58.104,
305
+ "eval_steps_per_second": 1.984,
306
+ "step": 246
307
+ },
308
+ {
309
+ "epoch": 17.24,
310
+ "learning_rate": 1.5873015873015872e-05,
311
+ "loss": 0.7324,
312
+ "step": 250
313
+ },
314
+ {
315
+ "epoch": 17.93,
316
+ "learning_rate": 1.4285714285714285e-05,
317
+ "loss": 0.7292,
318
+ "step": 260
319
+ },
320
+ {
321
+ "epoch": 18.0,
322
+ "eval_accuracy": 0.6780487804878049,
323
+ "eval_loss": 0.7389385104179382,
324
+ "eval_runtime": 3.4574,
325
+ "eval_samples_per_second": 59.292,
326
+ "eval_steps_per_second": 2.025,
327
+ "step": 261
328
+ },
329
+ {
330
+ "epoch": 18.62,
331
+ "learning_rate": 1.2698412698412699e-05,
332
+ "loss": 0.7354,
333
+ "step": 270
334
+ },
335
+ {
336
+ "epoch": 18.97,
337
+ "eval_accuracy": 0.7121951219512195,
338
+ "eval_loss": 0.668440580368042,
339
+ "eval_runtime": 3.6943,
340
+ "eval_samples_per_second": 55.491,
341
+ "eval_steps_per_second": 1.895,
342
+ "step": 275
343
+ },
344
+ {
345
+ "epoch": 19.31,
346
+ "learning_rate": 1.1111111111111112e-05,
347
+ "loss": 0.7274,
348
+ "step": 280
349
+ },
350
+ {
351
+ "epoch": 20.0,
352
+ "learning_rate": 9.523809523809523e-06,
353
+ "loss": 0.6847,
354
+ "step": 290
355
+ },
356
+ {
357
+ "epoch": 20.0,
358
+ "eval_accuracy": 0.7121951219512195,
359
+ "eval_loss": 0.6821430325508118,
360
+ "eval_runtime": 3.4768,
361
+ "eval_samples_per_second": 58.962,
362
+ "eval_steps_per_second": 2.013,
363
+ "step": 290
364
+ },
365
+ {
366
+ "epoch": 20.69,
367
+ "learning_rate": 7.936507936507936e-06,
368
+ "loss": 0.7231,
369
+ "step": 300
370
+ },
371
+ {
372
+ "epoch": 20.97,
373
+ "eval_accuracy": 0.7024390243902439,
374
+ "eval_loss": 0.6839069128036499,
375
+ "eval_runtime": 3.53,
376
+ "eval_samples_per_second": 58.074,
377
+ "eval_steps_per_second": 1.983,
378
+ "step": 304
379
+ },
380
+ {
381
+ "epoch": 21.38,
382
+ "learning_rate": 6.349206349206349e-06,
383
+ "loss": 0.6962,
384
+ "step": 310
385
+ },
386
+ {
387
+ "epoch": 22.0,
388
  "eval_accuracy": 0.6878048780487804,
389
+ "eval_loss": 0.6958089470863342,
390
+ "eval_runtime": 3.4679,
391
+ "eval_samples_per_second": 59.113,
392
+ "eval_steps_per_second": 2.018,
393
+ "step": 319
394
  },
395
  {
396
+ "epoch": 22.07,
397
+ "learning_rate": 4.7619047619047615e-06,
398
+ "loss": 0.6995,
399
+ "step": 320
400
+ },
401
+ {
402
+ "epoch": 22.76,
403
+ "learning_rate": 3.1746031746031746e-06,
404
+ "loss": 0.7079,
405
+ "step": 330
406
+ },
407
+ {
408
+ "epoch": 22.97,
409
+ "eval_accuracy": 0.6878048780487804,
410
+ "eval_loss": 0.7039469480514526,
411
+ "eval_runtime": 3.4654,
412
+ "eval_samples_per_second": 59.156,
413
+ "eval_steps_per_second": 2.02,
414
+ "step": 333
415
+ },
416
+ {
417
+ "epoch": 23.45,
418
+ "learning_rate": 1.5873015873015873e-06,
419
+ "loss": 0.7088,
420
+ "step": 340
421
+ },
422
+ {
423
+ "epoch": 24.0,
424
+ "eval_accuracy": 0.6878048780487804,
425
+ "eval_loss": 0.6974486112594604,
426
+ "eval_runtime": 4.2005,
427
+ "eval_samples_per_second": 48.803,
428
+ "eval_steps_per_second": 1.666,
429
+ "step": 348
430
+ },
431
+ {
432
+ "epoch": 24.14,
433
+ "learning_rate": 0.0,
434
+ "loss": 0.7106,
435
+ "step": 350
436
+ },
437
+ {
438
+ "epoch": 24.14,
439
+ "eval_accuracy": 0.6878048780487804,
440
+ "eval_loss": 0.6975364089012146,
441
+ "eval_runtime": 3.9957,
442
+ "eval_samples_per_second": 51.305,
443
+ "eval_steps_per_second": 1.752,
444
+ "step": 350
445
+ },
446
+ {
447
+ "epoch": 24.14,
448
+ "step": 350,
449
+ "total_flos": 1.444384721662378e+18,
450
+ "train_loss": 0.8573012270246233,
451
+ "train_runtime": 2334.0273,
452
+ "train_samples_per_second": 19.698,
453
+ "train_steps_per_second": 0.15
454
  }
455
  ],
456
  "logging_steps": 10,
457
+ "max_steps": 350,
458
  "num_input_tokens_seen": 0,
459
+ "num_train_epochs": 25,
460
  "save_steps": 500,
461
+ "total_flos": 1.444384721662378e+18,
462
  "train_batch_size": 32,
463
  "trial_name": null,
464
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53217e0c5182d6e5b40d2b23665f7781a2c9b4eacf79515ea542107971a6ea22
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f2b62bbe9a6d50250e169300e55cdbb15de7e0a611ef74fc73949faedf4cc42
3
  size 4792