moock commited on
Commit
ed16a11
1 Parent(s): 900a336

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8154362416107382
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,21 +32,20 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.6445
36
- - Accuracy: 0.8154
37
 
38
  ## Model description
39
 
40
- Predict Expansion Grade - Gardner Score from an embryo image
41
-
42
 
43
  ## Intended uses & limitations
44
 
45
- More information will be provided
46
 
47
  ## Training and evaluation data
48
 
49
- More information will be provided
50
 
51
  ## Training procedure
52
 
@@ -62,22 +61,27 @@ The following hyperparameters were used during training:
62
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
63
  - lr_scheduler_type: linear
64
  - lr_scheduler_warmup_ratio: 0.1
65
- - num_epochs: 10
66
 
67
  ### Training results
68
 
69
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
70
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
71
- | 1.6002 | 0.97 | 14 | 1.4558 | 0.5024 |
72
- | 1.4093 | 2.0 | 29 | 1.2320 | 0.5024 |
73
- | 1.1068 | 2.97 | 43 | 1.0740 | 0.5951 |
74
- | 0.9988 | 4.0 | 58 | 0.9967 | 0.6049 |
75
- | 0.9099 | 4.97 | 72 | 0.9248 | 0.6 |
76
- | 0.8674 | 6.0 | 87 | 0.8766 | 0.6780 |
77
- | 0.8638 | 6.97 | 101 | 0.8656 | 0.6732 |
78
- | 0.833 | 8.0 | 116 | 0.8395 | 0.6732 |
79
- | 0.8211 | 8.97 | 130 | 0.8204 | 0.6927 |
80
- | 0.8236 | 9.66 | 140 | 0.8204 | 0.6780 |
 
 
 
 
 
81
 
82
 
83
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8355704697986577
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5500
36
+ - Accuracy: 0.8356
37
 
38
  ## Model description
39
 
40
+ More information needed
 
41
 
42
  ## Intended uses & limitations
43
 
44
+ More information needed
45
 
46
  ## Training and evaluation data
47
 
48
+ More information needed
49
 
50
  ## Training procedure
51
 
 
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: linear
63
  - lr_scheduler_warmup_ratio: 0.1
64
+ - num_epochs: 15
65
 
66
  ### Training results
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | 1.6043 | 0.97 | 14 | 1.5288 | 0.5415 |
71
+ | 1.4967 | 2.0 | 29 | 1.1719 | 0.5415 |
72
+ | 1.1276 | 2.97 | 43 | 1.0525 | 0.5463 |
73
+ | 1.0796 | 4.0 | 58 | 0.9086 | 0.6537 |
74
+ | 0.9387 | 4.97 | 72 | 0.8500 | 0.6439 |
75
+ | 0.9232 | 6.0 | 87 | 0.8190 | 0.6732 |
76
+ | 0.8456 | 6.97 | 101 | 0.8042 | 0.6878 |
77
+ | 0.8348 | 8.0 | 116 | 0.7770 | 0.6927 |
78
+ | 0.8057 | 8.97 | 130 | 0.7457 | 0.7073 |
79
+ | 0.8033 | 10.0 | 145 | 0.7353 | 0.7024 |
80
+ | 0.7822 | 10.97 | 159 | 0.7166 | 0.7122 |
81
+ | 0.7594 | 12.0 | 174 | 0.7188 | 0.7171 |
82
+ | 0.7777 | 12.97 | 188 | 0.7086 | 0.7171 |
83
+ | 0.7445 | 14.0 | 203 | 0.7139 | 0.6878 |
84
+ | 0.7513 | 14.48 | 210 | 0.7139 | 0.6878 |
85
 
86
 
87
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.66,
3
- "eval_accuracy": 0.8154362416107382,
4
- "eval_loss": 0.6445475816726685,
5
- "eval_runtime": 10.3295,
6
- "eval_samples_per_second": 28.849,
7
- "eval_steps_per_second": 0.968,
8
- "train_loss": 1.0145302023206437,
9
- "train_runtime": 896.6945,
10
- "train_samples_per_second": 20.509,
11
- "train_steps_per_second": 0.156
12
  }
 
1
  {
2
+ "epoch": 14.48,
3
+ "eval_accuracy": 0.8355704697986577,
4
+ "eval_loss": 0.5500471591949463,
5
+ "eval_runtime": 5.0985,
6
+ "eval_samples_per_second": 58.448,
7
+ "eval_steps_per_second": 1.961,
8
+ "train_loss": 0.9456698463076637,
9
+ "train_runtime": 1102.737,
10
+ "train_samples_per_second": 25.015,
11
+ "train_steps_per_second": 0.19
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.66,
3
- "eval_accuracy": 0.8154362416107382,
4
- "eval_loss": 0.6445475816726685,
5
- "eval_runtime": 10.3295,
6
- "eval_samples_per_second": 28.849,
7
- "eval_steps_per_second": 0.968
8
  }
 
1
  {
2
+ "epoch": 14.48,
3
+ "eval_accuracy": 0.8355704697986577,
4
+ "eval_loss": 0.5500471591949463,
5
+ "eval_runtime": 5.0985,
6
+ "eval_samples_per_second": 58.448,
7
+ "eval_steps_per_second": 1.961
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8780f92caaadaf679a8727782284d64e7aa4f2cb477acc21b6624ba44457c5be
3
  size 110359372
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba5d11712824f6d4f6f330378764fd34aae7206d7f052e681a25944638ca39e
3
  size 110359372
runs/Dec27_20-56-20_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703707007.MacBook-Pro-de-Max-2.local.31343.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2310502586d42fb6eca3f41f5d1165c1cf835316b0b58c9e620aa3b3358ba1a7
3
+ size 8608
runs/Dec27_21-09-51_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703707795.MacBook-Pro-de-Max-2.local.31343.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac2ebe43936c7965468a39b4d27f7f686b001c9898c6e4072e421b9a4e811d6
3
+ size 37023
runs/Dec27_21-09-51_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719586.MacBook-Pro-de-Max-2.local.31343.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24a62940b383067cccbd28d7f5ca0dc7266a93ef8b282cab285fca896e7984a
3
+ size 734
runs/Dec28_00-27-59_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719713.MacBook-Pro-de-Max-2.local.31343.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f84777390ee9fea3752cbbbdd0693698a22640b4c752e388932c01714ee1cb37
3
+ size 5649
runs/Dec28_00-27-59_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719811.MacBook-Pro-de-Max-2.local.31343.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6acbd6794a21eeb8a1575534f1312b0c1a0e2c943f0c6d8544908701380001c9
3
+ size 722
runs/Dec28_00-31-35_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719905.MacBook-Pro-de-Max-2.local.31343.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d561530c489cf72446ac0cf1c241be23dfc6a96eab829500caeadcb8298e589
3
+ size 13243
runs/Dec28_00-31-35_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703721019.MacBook-Pro-de-Max-2.local.31343.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2d78ab3bbe5949eb1507ea7576d7988c7963f41c07a875debbb2d0f2a01516
3
+ size 734
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 9.66,
3
- "train_loss": 1.0145302023206437,
4
- "train_runtime": 896.6945,
5
- "train_samples_per_second": 20.509,
6
- "train_steps_per_second": 0.156
7
  }
 
1
  {
2
+ "epoch": 14.48,
3
+ "train_loss": 0.9456698463076637,
4
+ "train_runtime": 1102.737,
5
+ "train_samples_per_second": 25.015,
6
+ "train_steps_per_second": 0.19
7
  }
trainer_state.json CHANGED
@@ -1,203 +1,290 @@
1
  {
2
- "best_metric": 0.6926829268292682,
3
- "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-gardner-exp-max/checkpoint-130",
4
- "epoch": 9.655172413793103,
5
  "eval_steps": 500,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.69,
13
- "learning_rate": 3.571428571428572e-05,
14
- "loss": 1.6002,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.97,
19
- "eval_accuracy": 0.5024390243902439,
20
- "eval_loss": 1.4557533264160156,
21
- "eval_runtime": 10.833,
22
- "eval_samples_per_second": 18.924,
23
- "eval_steps_per_second": 0.646,
24
  "step": 14
25
  },
26
  {
27
  "epoch": 1.38,
28
  "learning_rate": 4.761904761904762e-05,
29
- "loss": 1.4093,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.5024390243902439,
35
- "eval_loss": 1.2319719791412354,
36
- "eval_runtime": 4.0581,
37
- "eval_samples_per_second": 50.516,
38
- "eval_steps_per_second": 1.725,
39
  "step": 29
40
  },
41
  {
42
  "epoch": 2.07,
43
- "learning_rate": 4.3650793650793655e-05,
44
- "loss": 1.232,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 2.76,
49
- "learning_rate": 3.968253968253968e-05,
50
- "loss": 1.1068,
51
  "step": 40
52
  },
53
  {
54
  "epoch": 2.97,
55
- "eval_accuracy": 0.5951219512195122,
56
- "eval_loss": 1.0739854574203491,
57
- "eval_runtime": 4.0516,
58
- "eval_samples_per_second": 50.598,
59
- "eval_steps_per_second": 1.728,
60
  "step": 43
61
  },
62
  {
63
  "epoch": 3.45,
64
- "learning_rate": 3.571428571428572e-05,
65
- "loss": 0.9988,
66
  "step": 50
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.6048780487804878,
71
- "eval_loss": 0.9966912865638733,
72
- "eval_runtime": 3.5184,
73
- "eval_samples_per_second": 58.266,
74
- "eval_steps_per_second": 1.99,
75
  "step": 58
76
  },
77
  {
78
  "epoch": 4.14,
79
- "learning_rate": 3.1746031746031745e-05,
80
- "loss": 1.0171,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 4.83,
85
- "learning_rate": 2.777777777777778e-05,
86
- "loss": 0.9099,
87
  "step": 70
88
  },
89
  {
90
  "epoch": 4.97,
91
- "eval_accuracy": 0.6,
92
- "eval_loss": 0.9247760772705078,
93
- "eval_runtime": 3.5711,
94
- "eval_samples_per_second": 57.405,
95
- "eval_steps_per_second": 1.96,
96
  "step": 72
97
  },
98
  {
99
  "epoch": 5.52,
100
- "learning_rate": 2.380952380952381e-05,
101
- "loss": 0.8674,
102
  "step": 80
103
  },
104
  {
105
  "epoch": 6.0,
106
- "eval_accuracy": 0.6780487804878049,
107
- "eval_loss": 0.8766470551490784,
108
- "eval_runtime": 3.5697,
109
- "eval_samples_per_second": 57.428,
110
- "eval_steps_per_second": 1.961,
111
  "step": 87
112
  },
113
  {
114
  "epoch": 6.21,
115
- "learning_rate": 1.984126984126984e-05,
116
- "loss": 0.8807,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 6.9,
121
- "learning_rate": 1.5873015873015872e-05,
122
- "loss": 0.8638,
123
  "step": 100
124
  },
125
  {
126
  "epoch": 6.97,
127
- "eval_accuracy": 0.6731707317073171,
128
- "eval_loss": 0.865567684173584,
129
- "eval_runtime": 3.828,
130
- "eval_samples_per_second": 53.553,
131
- "eval_steps_per_second": 1.829,
132
  "step": 101
133
  },
134
  {
135
  "epoch": 7.59,
136
- "learning_rate": 1.1904761904761905e-05,
137
- "loss": 0.833,
138
  "step": 110
139
  },
140
  {
141
  "epoch": 8.0,
142
- "eval_accuracy": 0.6731707317073171,
143
- "eval_loss": 0.8395170569419861,
144
- "eval_runtime": 3.5828,
145
- "eval_samples_per_second": 57.218,
146
- "eval_steps_per_second": 1.954,
147
  "step": 116
148
  },
149
  {
150
  "epoch": 8.28,
151
- "learning_rate": 7.936507936507936e-06,
152
- "loss": 0.8396,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 8.97,
157
- "learning_rate": 3.968253968253968e-06,
158
- "loss": 0.8211,
159
  "step": 130
160
  },
161
  {
162
  "epoch": 8.97,
163
- "eval_accuracy": 0.6926829268292682,
164
- "eval_loss": 0.8203723430633545,
165
- "eval_runtime": 3.633,
166
- "eval_samples_per_second": 56.427,
167
- "eval_steps_per_second": 1.927,
168
  "step": 130
169
  },
170
  {
171
  "epoch": 9.66,
172
- "learning_rate": 0.0,
173
- "loss": 0.8236,
174
  "step": 140
175
  },
176
  {
177
- "epoch": 9.66,
178
- "eval_accuracy": 0.6780487804878049,
179
- "eval_loss": 0.8203750848770142,
180
- "eval_runtime": 3.5738,
181
- "eval_samples_per_second": 57.361,
182
- "eval_steps_per_second": 1.959,
183
- "step": 140
184
  },
185
  {
186
- "epoch": 9.66,
187
- "step": 140,
188
- "total_flos": 5.78085766574506e+17,
189
- "train_loss": 1.0145302023206437,
190
- "train_runtime": 896.6945,
191
- "train_samples_per_second": 20.509,
192
- "train_steps_per_second": 0.156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  }
194
  ],
195
  "logging_steps": 10,
196
- "max_steps": 140,
197
  "num_input_tokens_seen": 0,
198
- "num_train_epochs": 10,
199
  "save_steps": 500,
200
- "total_flos": 5.78085766574506e+17,
201
  "train_batch_size": 32,
202
  "trial_name": null,
203
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7170731707317073,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-gardner-exp-max/checkpoint-174",
4
+ "epoch": 14.482758620689655,
5
  "eval_steps": 500,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.69,
13
+ "learning_rate": 2.380952380952381e-05,
14
+ "loss": 1.6043,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.97,
19
+ "eval_accuracy": 0.5414634146341464,
20
+ "eval_loss": 1.5287535190582275,
21
+ "eval_runtime": 3.5003,
22
+ "eval_samples_per_second": 58.567,
23
+ "eval_steps_per_second": 2.0,
24
  "step": 14
25
  },
26
  {
27
  "epoch": 1.38,
28
  "learning_rate": 4.761904761904762e-05,
29
+ "loss": 1.4967,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.5414634146341464,
35
+ "eval_loss": 1.1718776226043701,
36
+ "eval_runtime": 3.523,
37
+ "eval_samples_per_second": 58.189,
38
+ "eval_steps_per_second": 1.987,
39
  "step": 29
40
  },
41
  {
42
  "epoch": 2.07,
43
+ "learning_rate": 4.761904761904762e-05,
44
+ "loss": 1.2615,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 2.76,
49
+ "learning_rate": 4.4973544973544974e-05,
50
+ "loss": 1.1276,
51
  "step": 40
52
  },
53
  {
54
  "epoch": 2.97,
55
+ "eval_accuracy": 0.5463414634146342,
56
+ "eval_loss": 1.052482008934021,
57
+ "eval_runtime": 3.482,
58
+ "eval_samples_per_second": 58.874,
59
+ "eval_steps_per_second": 2.01,
60
  "step": 43
61
  },
62
  {
63
  "epoch": 3.45,
64
+ "learning_rate": 4.232804232804233e-05,
65
+ "loss": 1.0796,
66
  "step": 50
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "eval_accuracy": 0.6536585365853659,
71
+ "eval_loss": 0.9085918664932251,
72
+ "eval_runtime": 3.5324,
73
+ "eval_samples_per_second": 58.034,
74
+ "eval_steps_per_second": 1.982,
75
  "step": 58
76
  },
77
  {
78
  "epoch": 4.14,
79
+ "learning_rate": 3.968253968253968e-05,
80
+ "loss": 1.0019,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 4.83,
85
+ "learning_rate": 3.7037037037037037e-05,
86
+ "loss": 0.9387,
87
  "step": 70
88
  },
89
  {
90
  "epoch": 4.97,
91
+ "eval_accuracy": 0.6439024390243903,
92
+ "eval_loss": 0.8500024676322937,
93
+ "eval_runtime": 3.5195,
94
+ "eval_samples_per_second": 58.246,
95
+ "eval_steps_per_second": 1.989,
96
  "step": 72
97
  },
98
  {
99
  "epoch": 5.52,
100
+ "learning_rate": 3.439153439153439e-05,
101
+ "loss": 0.9232,
102
  "step": 80
103
  },
104
  {
105
  "epoch": 6.0,
106
+ "eval_accuracy": 0.6731707317073171,
107
+ "eval_loss": 0.8189888000488281,
108
+ "eval_runtime": 3.532,
109
+ "eval_samples_per_second": 58.041,
110
+ "eval_steps_per_second": 1.982,
111
  "step": 87
112
  },
113
  {
114
  "epoch": 6.21,
115
+ "learning_rate": 3.1746031746031745e-05,
116
+ "loss": 0.9018,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 6.9,
121
+ "learning_rate": 2.91005291005291e-05,
122
+ "loss": 0.8456,
123
  "step": 100
124
  },
125
  {
126
  "epoch": 6.97,
127
+ "eval_accuracy": 0.6878048780487804,
128
+ "eval_loss": 0.8041830062866211,
129
+ "eval_runtime": 3.4986,
130
+ "eval_samples_per_second": 58.594,
131
+ "eval_steps_per_second": 2.001,
132
  "step": 101
133
  },
134
  {
135
  "epoch": 7.59,
136
+ "learning_rate": 2.6455026455026456e-05,
137
+ "loss": 0.8348,
138
  "step": 110
139
  },
140
  {
141
  "epoch": 8.0,
142
+ "eval_accuracy": 0.6926829268292682,
143
+ "eval_loss": 0.7769930362701416,
144
+ "eval_runtime": 3.5012,
145
+ "eval_samples_per_second": 58.552,
146
+ "eval_steps_per_second": 1.999,
147
  "step": 116
148
  },
149
  {
150
  "epoch": 8.28,
151
+ "learning_rate": 2.380952380952381e-05,
152
+ "loss": 0.8287,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 8.97,
157
+ "learning_rate": 2.1164021164021164e-05,
158
+ "loss": 0.8057,
159
  "step": 130
160
  },
161
  {
162
  "epoch": 8.97,
163
+ "eval_accuracy": 0.7073170731707317,
164
+ "eval_loss": 0.7456830143928528,
165
+ "eval_runtime": 3.4827,
166
+ "eval_samples_per_second": 58.862,
167
+ "eval_steps_per_second": 2.01,
168
  "step": 130
169
  },
170
  {
171
  "epoch": 9.66,
172
+ "learning_rate": 1.8518518518518518e-05,
173
+ "loss": 0.8033,
174
  "step": 140
175
  },
176
  {
177
+ "epoch": 10.0,
178
+ "eval_accuracy": 0.7024390243902439,
179
+ "eval_loss": 0.7352668642997742,
180
+ "eval_runtime": 3.5384,
181
+ "eval_samples_per_second": 57.935,
182
+ "eval_steps_per_second": 1.978,
183
+ "step": 145
184
  },
185
  {
186
+ "epoch": 10.34,
187
+ "learning_rate": 1.5873015873015872e-05,
188
+ "loss": 0.7822,
189
+ "step": 150
190
+ },
191
+ {
192
+ "epoch": 10.97,
193
+ "eval_accuracy": 0.7121951219512195,
194
+ "eval_loss": 0.7165755033493042,
195
+ "eval_runtime": 3.4957,
196
+ "eval_samples_per_second": 58.643,
197
+ "eval_steps_per_second": 2.002,
198
+ "step": 159
199
+ },
200
+ {
201
+ "epoch": 11.03,
202
+ "learning_rate": 1.3227513227513228e-05,
203
+ "loss": 0.8041,
204
+ "step": 160
205
+ },
206
+ {
207
+ "epoch": 11.72,
208
+ "learning_rate": 1.0582010582010582e-05,
209
+ "loss": 0.7594,
210
+ "step": 170
211
+ },
212
+ {
213
+ "epoch": 12.0,
214
+ "eval_accuracy": 0.7170731707317073,
215
+ "eval_loss": 0.718829333782196,
216
+ "eval_runtime": 3.5638,
217
+ "eval_samples_per_second": 57.522,
218
+ "eval_steps_per_second": 1.964,
219
+ "step": 174
220
+ },
221
+ {
222
+ "epoch": 12.41,
223
+ "learning_rate": 7.936507936507936e-06,
224
+ "loss": 0.7777,
225
+ "step": 180
226
+ },
227
+ {
228
+ "epoch": 12.97,
229
+ "eval_accuracy": 0.7170731707317073,
230
+ "eval_loss": 0.7086274027824402,
231
+ "eval_runtime": 3.4886,
232
+ "eval_samples_per_second": 58.762,
233
+ "eval_steps_per_second": 2.007,
234
+ "step": 188
235
+ },
236
+ {
237
+ "epoch": 13.1,
238
+ "learning_rate": 5.291005291005291e-06,
239
+ "loss": 0.7863,
240
+ "step": 190
241
+ },
242
+ {
243
+ "epoch": 13.79,
244
+ "learning_rate": 2.6455026455026455e-06,
245
+ "loss": 0.7445,
246
+ "step": 200
247
+ },
248
+ {
249
+ "epoch": 14.0,
250
+ "eval_accuracy": 0.6878048780487804,
251
+ "eval_loss": 0.7139347791671753,
252
+ "eval_runtime": 3.4798,
253
+ "eval_samples_per_second": 58.912,
254
+ "eval_steps_per_second": 2.012,
255
+ "step": 203
256
+ },
257
+ {
258
+ "epoch": 14.48,
259
+ "learning_rate": 0.0,
260
+ "loss": 0.7513,
261
+ "step": 210
262
+ },
263
+ {
264
+ "epoch": 14.48,
265
+ "eval_accuracy": 0.6878048780487804,
266
+ "eval_loss": 0.7138883471488953,
267
+ "eval_runtime": 3.48,
268
+ "eval_samples_per_second": 58.908,
269
+ "eval_steps_per_second": 2.012,
270
+ "step": 210
271
+ },
272
+ {
273
+ "epoch": 14.48,
274
+ "step": 210,
275
+ "total_flos": 8.6685208493713e+17,
276
+ "train_loss": 0.9456698463076637,
277
+ "train_runtime": 1102.737,
278
+ "train_samples_per_second": 25.015,
279
+ "train_steps_per_second": 0.19
280
  }
281
  ],
282
  "logging_steps": 10,
283
+ "max_steps": 210,
284
  "num_input_tokens_seen": 0,
285
+ "num_train_epochs": 15,
286
  "save_steps": 500,
287
+ "total_flos": 8.6685208493713e+17,
288
  "train_batch_size": 32,
289
  "trial_name": null,
290
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1577050422d312cadf11da4407c557f68886a726d97b4971bf7def3ccd26394
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53217e0c5182d6e5b40d2b23665f7781a2c9b4eacf79515ea542107971a6ea22
3
  size 4792