End of training
Browse files- README.md +22 -18
- all_results.json +10 -10
- eval_results.json +6 -6
- model.safetensors +1 -1
- runs/Dec27_20-56-20_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703707007.MacBook-Pro-de-Max-2.local.31343.2 +3 -0
- runs/Dec27_21-09-51_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703707795.MacBook-Pro-de-Max-2.local.31343.3 +3 -0
- runs/Dec27_21-09-51_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719586.MacBook-Pro-de-Max-2.local.31343.4 +3 -0
- runs/Dec28_00-27-59_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719713.MacBook-Pro-de-Max-2.local.31343.5 +3 -0
- runs/Dec28_00-27-59_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719811.MacBook-Pro-de-Max-2.local.31343.6 +3 -0
- runs/Dec28_00-31-35_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719905.MacBook-Pro-de-Max-2.local.31343.7 +3 -0
- runs/Dec28_00-31-35_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703721019.MacBook-Pro-de-Max-2.local.31343.8 +3 -0
- train_results.json +5 -5
- trainer_state.json +180 -93
- training_args.bin +1 -1
README.md
CHANGED
@@ -22,7 +22,7 @@ model-index:
|
|
22 |
metrics:
|
23 |
- name: Accuracy
|
24 |
type: accuracy
|
25 |
-
value: 0.
|
26 |
---
|
27 |
|
28 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -32,21 +32,20 @@ should probably proofread and complete it, then remove this comment. -->
|
|
32 |
|
33 |
This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
|
34 |
It achieves the following results on the evaluation set:
|
35 |
-
- Loss: 0.
|
36 |
-
- Accuracy: 0.
|
37 |
|
38 |
## Model description
|
39 |
|
40 |
-
|
41 |
-
|
42 |
|
43 |
## Intended uses & limitations
|
44 |
|
45 |
-
More information
|
46 |
|
47 |
## Training and evaluation data
|
48 |
|
49 |
-
More information
|
50 |
|
51 |
## Training procedure
|
52 |
|
@@ -62,22 +61,27 @@ The following hyperparameters were used during training:
|
|
62 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
63 |
- lr_scheduler_type: linear
|
64 |
- lr_scheduler_warmup_ratio: 0.1
|
65 |
-
- num_epochs:
|
66 |
|
67 |
### Training results
|
68 |
|
69 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
70 |
|:-------------:|:-----:|:----:|:---------------:|:--------:|
|
71 |
-
| 1.
|
72 |
-
| 1.
|
73 |
-
| 1.
|
74 |
-
|
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
|
83 |
### Framework versions
|
|
|
22 |
metrics:
|
23 |
- name: Accuracy
|
24 |
type: accuracy
|
25 |
+
value: 0.8355704697986577
|
26 |
---
|
27 |
|
28 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
32 |
|
33 |
This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
|
34 |
It achieves the following results on the evaluation set:
|
35 |
+
- Loss: 0.5500
|
36 |
+
- Accuracy: 0.8356
|
37 |
|
38 |
## Model description
|
39 |
|
40 |
+
More information needed
|
|
|
41 |
|
42 |
## Intended uses & limitations
|
43 |
|
44 |
+
More information needed
|
45 |
|
46 |
## Training and evaluation data
|
47 |
|
48 |
+
More information needed
|
49 |
|
50 |
## Training procedure
|
51 |
|
|
|
61 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
62 |
- lr_scheduler_type: linear
|
63 |
- lr_scheduler_warmup_ratio: 0.1
|
64 |
+
- num_epochs: 15
|
65 |
|
66 |
### Training results
|
67 |
|
68 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
69 |
|:-------------:|:-----:|:----:|:---------------:|:--------:|
|
70 |
+
| 1.6043 | 0.97 | 14 | 1.5288 | 0.5415 |
|
71 |
+
| 1.4967 | 2.0 | 29 | 1.1719 | 0.5415 |
|
72 |
+
| 1.1276 | 2.97 | 43 | 1.0525 | 0.5463 |
|
73 |
+
| 1.0796 | 4.0 | 58 | 0.9086 | 0.6537 |
|
74 |
+
| 0.9387 | 4.97 | 72 | 0.8500 | 0.6439 |
|
75 |
+
| 0.9232 | 6.0 | 87 | 0.8190 | 0.6732 |
|
76 |
+
| 0.8456 | 6.97 | 101 | 0.8042 | 0.6878 |
|
77 |
+
| 0.8348 | 8.0 | 116 | 0.7770 | 0.6927 |
|
78 |
+
| 0.8057 | 8.97 | 130 | 0.7457 | 0.7073 |
|
79 |
+
| 0.8033 | 10.0 | 145 | 0.7353 | 0.7024 |
|
80 |
+
| 0.7822 | 10.97 | 159 | 0.7166 | 0.7122 |
|
81 |
+
| 0.7594 | 12.0 | 174 | 0.7188 | 0.7171 |
|
82 |
+
| 0.7777 | 12.97 | 188 | 0.7086 | 0.7171 |
|
83 |
+
| 0.7445 | 14.0 | 203 | 0.7139 | 0.6878 |
|
84 |
+
| 0.7513 | 14.48 | 210 | 0.7139 | 0.6878 |
|
85 |
|
86 |
|
87 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_loss": 0.
|
5 |
-
"eval_runtime":
|
6 |
-
"eval_samples_per_second":
|
7 |
-
"eval_steps_per_second":
|
8 |
-
"train_loss":
|
9 |
-
"train_runtime":
|
10 |
-
"train_samples_per_second":
|
11 |
-
"train_steps_per_second": 0.
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 14.48,
|
3 |
+
"eval_accuracy": 0.8355704697986577,
|
4 |
+
"eval_loss": 0.5500471591949463,
|
5 |
+
"eval_runtime": 5.0985,
|
6 |
+
"eval_samples_per_second": 58.448,
|
7 |
+
"eval_steps_per_second": 1.961,
|
8 |
+
"train_loss": 0.9456698463076637,
|
9 |
+
"train_runtime": 1102.737,
|
10 |
+
"train_samples_per_second": 25.015,
|
11 |
+
"train_steps_per_second": 0.19
|
12 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_loss": 0.
|
5 |
-
"eval_runtime":
|
6 |
-
"eval_samples_per_second":
|
7 |
-
"eval_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 14.48,
|
3 |
+
"eval_accuracy": 0.8355704697986577,
|
4 |
+
"eval_loss": 0.5500471591949463,
|
5 |
+
"eval_runtime": 5.0985,
|
6 |
+
"eval_samples_per_second": 58.448,
|
7 |
+
"eval_steps_per_second": 1.961
|
8 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 110359372
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ba5d11712824f6d4f6f330378764fd34aae7206d7f052e681a25944638ca39e
|
3 |
size 110359372
|
runs/Dec27_20-56-20_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703707007.MacBook-Pro-de-Max-2.local.31343.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2310502586d42fb6eca3f41f5d1165c1cf835316b0b58c9e620aa3b3358ba1a7
|
3 |
+
size 8608
|
runs/Dec27_21-09-51_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703707795.MacBook-Pro-de-Max-2.local.31343.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ac2ebe43936c7965468a39b4d27f7f686b001c9898c6e4072e421b9a4e811d6
|
3 |
+
size 37023
|
runs/Dec27_21-09-51_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719586.MacBook-Pro-de-Max-2.local.31343.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f24a62940b383067cccbd28d7f5ca0dc7266a93ef8b282cab285fca896e7984a
|
3 |
+
size 734
|
runs/Dec28_00-27-59_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719713.MacBook-Pro-de-Max-2.local.31343.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f84777390ee9fea3752cbbbdd0693698a22640b4c752e388932c01714ee1cb37
|
3 |
+
size 5649
|
runs/Dec28_00-27-59_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719811.MacBook-Pro-de-Max-2.local.31343.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6acbd6794a21eeb8a1575534f1312b0c1a0e2c943f0c6d8544908701380001c9
|
3 |
+
size 722
|
runs/Dec28_00-31-35_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703719905.MacBook-Pro-de-Max-2.local.31343.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d561530c489cf72446ac0cf1c241be23dfc6a96eab829500caeadcb8298e589
|
3 |
+
size 13243
|
runs/Dec28_00-31-35_MacBook-Pro-de-Max-2.local/events.out.tfevents.1703721019.MacBook-Pro-de-Max-2.local.31343.8
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c2d78ab3bbe5949eb1507ea7576d7988c7963f41c07a875debbb2d0f2a01516
|
3 |
+
size 734
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss":
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 14.48,
|
3 |
+
"train_loss": 0.9456698463076637,
|
4 |
+
"train_runtime": 1102.737,
|
5 |
+
"train_samples_per_second": 25.015,
|
6 |
+
"train_steps_per_second": 0.19
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,203 +1,290 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-gardner-exp-max/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.69,
|
13 |
-
"learning_rate":
|
14 |
-
"loss": 1.
|
15 |
"step": 10
|
16 |
},
|
17 |
{
|
18 |
"epoch": 0.97,
|
19 |
-
"eval_accuracy": 0.
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second":
|
23 |
-
"eval_steps_per_second": 0
|
24 |
"step": 14
|
25 |
},
|
26 |
{
|
27 |
"epoch": 1.38,
|
28 |
"learning_rate": 4.761904761904762e-05,
|
29 |
-
"loss": 1.
|
30 |
"step": 20
|
31 |
},
|
32 |
{
|
33 |
"epoch": 2.0,
|
34 |
-
"eval_accuracy": 0.
|
35 |
-
"eval_loss": 1.
|
36 |
-
"eval_runtime":
|
37 |
-
"eval_samples_per_second":
|
38 |
-
"eval_steps_per_second": 1.
|
39 |
"step": 29
|
40 |
},
|
41 |
{
|
42 |
"epoch": 2.07,
|
43 |
-
"learning_rate": 4.
|
44 |
-
"loss": 1.
|
45 |
"step": 30
|
46 |
},
|
47 |
{
|
48 |
"epoch": 2.76,
|
49 |
-
"learning_rate":
|
50 |
-
"loss": 1.
|
51 |
"step": 40
|
52 |
},
|
53 |
{
|
54 |
"epoch": 2.97,
|
55 |
-
"eval_accuracy": 0.
|
56 |
-
"eval_loss": 1.
|
57 |
-
"eval_runtime":
|
58 |
-
"eval_samples_per_second":
|
59 |
-
"eval_steps_per_second":
|
60 |
"step": 43
|
61 |
},
|
62 |
{
|
63 |
"epoch": 3.45,
|
64 |
-
"learning_rate":
|
65 |
-
"loss":
|
66 |
"step": 50
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"eval_accuracy": 0.
|
71 |
-
"eval_loss": 0.
|
72 |
-
"eval_runtime": 3.
|
73 |
-
"eval_samples_per_second": 58.
|
74 |
-
"eval_steps_per_second": 1.
|
75 |
"step": 58
|
76 |
},
|
77 |
{
|
78 |
"epoch": 4.14,
|
79 |
-
"learning_rate": 3.
|
80 |
-
"loss": 1.
|
81 |
"step": 60
|
82 |
},
|
83 |
{
|
84 |
"epoch": 4.83,
|
85 |
-
"learning_rate":
|
86 |
-
"loss": 0.
|
87 |
"step": 70
|
88 |
},
|
89 |
{
|
90 |
"epoch": 4.97,
|
91 |
-
"eval_accuracy": 0.
|
92 |
-
"eval_loss": 0.
|
93 |
-
"eval_runtime": 3.
|
94 |
-
"eval_samples_per_second":
|
95 |
-
"eval_steps_per_second": 1.
|
96 |
"step": 72
|
97 |
},
|
98 |
{
|
99 |
"epoch": 5.52,
|
100 |
-
"learning_rate":
|
101 |
-
"loss": 0.
|
102 |
"step": 80
|
103 |
},
|
104 |
{
|
105 |
"epoch": 6.0,
|
106 |
-
"eval_accuracy": 0.
|
107 |
-
"eval_loss": 0.
|
108 |
-
"eval_runtime": 3.
|
109 |
-
"eval_samples_per_second":
|
110 |
-
"eval_steps_per_second": 1.
|
111 |
"step": 87
|
112 |
},
|
113 |
{
|
114 |
"epoch": 6.21,
|
115 |
-
"learning_rate":
|
116 |
-
"loss": 0.
|
117 |
"step": 90
|
118 |
},
|
119 |
{
|
120 |
"epoch": 6.9,
|
121 |
-
"learning_rate":
|
122 |
-
"loss": 0.
|
123 |
"step": 100
|
124 |
},
|
125 |
{
|
126 |
"epoch": 6.97,
|
127 |
-
"eval_accuracy": 0.
|
128 |
-
"eval_loss": 0.
|
129 |
-
"eval_runtime": 3.
|
130 |
-
"eval_samples_per_second":
|
131 |
-
"eval_steps_per_second":
|
132 |
"step": 101
|
133 |
},
|
134 |
{
|
135 |
"epoch": 7.59,
|
136 |
-
"learning_rate":
|
137 |
-
"loss": 0.
|
138 |
"step": 110
|
139 |
},
|
140 |
{
|
141 |
"epoch": 8.0,
|
142 |
-
"eval_accuracy": 0.
|
143 |
-
"eval_loss": 0.
|
144 |
-
"eval_runtime": 3.
|
145 |
-
"eval_samples_per_second":
|
146 |
-
"eval_steps_per_second": 1.
|
147 |
"step": 116
|
148 |
},
|
149 |
{
|
150 |
"epoch": 8.28,
|
151 |
-
"learning_rate":
|
152 |
-
"loss": 0.
|
153 |
"step": 120
|
154 |
},
|
155 |
{
|
156 |
"epoch": 8.97,
|
157 |
-
"learning_rate":
|
158 |
-
"loss": 0.
|
159 |
"step": 130
|
160 |
},
|
161 |
{
|
162 |
"epoch": 8.97,
|
163 |
-
"eval_accuracy": 0.
|
164 |
-
"eval_loss": 0.
|
165 |
-
"eval_runtime": 3.
|
166 |
-
"eval_samples_per_second":
|
167 |
-
"eval_steps_per_second":
|
168 |
"step": 130
|
169 |
},
|
170 |
{
|
171 |
"epoch": 9.66,
|
172 |
-
"learning_rate":
|
173 |
-
"loss": 0.
|
174 |
"step": 140
|
175 |
},
|
176 |
{
|
177 |
-
"epoch":
|
178 |
-
"eval_accuracy": 0.
|
179 |
-
"eval_loss": 0.
|
180 |
-
"eval_runtime": 3.
|
181 |
-
"eval_samples_per_second": 57.
|
182 |
-
"eval_steps_per_second": 1.
|
183 |
-
"step":
|
184 |
},
|
185 |
{
|
186 |
-
"epoch":
|
187 |
-
"
|
188 |
-
"
|
189 |
-
"
|
190 |
-
|
191 |
-
|
192 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
}
|
194 |
],
|
195 |
"logging_steps": 10,
|
196 |
-
"max_steps":
|
197 |
"num_input_tokens_seen": 0,
|
198 |
-
"num_train_epochs":
|
199 |
"save_steps": 500,
|
200 |
-
"total_flos":
|
201 |
"train_batch_size": 32,
|
202 |
"trial_name": null,
|
203 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7170731707317073,
|
3 |
+
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-gardner-exp-max/checkpoint-174",
|
4 |
+
"epoch": 14.482758620689655,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 210,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.69,
|
13 |
+
"learning_rate": 2.380952380952381e-05,
|
14 |
+
"loss": 1.6043,
|
15 |
"step": 10
|
16 |
},
|
17 |
{
|
18 |
"epoch": 0.97,
|
19 |
+
"eval_accuracy": 0.5414634146341464,
|
20 |
+
"eval_loss": 1.5287535190582275,
|
21 |
+
"eval_runtime": 3.5003,
|
22 |
+
"eval_samples_per_second": 58.567,
|
23 |
+
"eval_steps_per_second": 2.0,
|
24 |
"step": 14
|
25 |
},
|
26 |
{
|
27 |
"epoch": 1.38,
|
28 |
"learning_rate": 4.761904761904762e-05,
|
29 |
+
"loss": 1.4967,
|
30 |
"step": 20
|
31 |
},
|
32 |
{
|
33 |
"epoch": 2.0,
|
34 |
+
"eval_accuracy": 0.5414634146341464,
|
35 |
+
"eval_loss": 1.1718776226043701,
|
36 |
+
"eval_runtime": 3.523,
|
37 |
+
"eval_samples_per_second": 58.189,
|
38 |
+
"eval_steps_per_second": 1.987,
|
39 |
"step": 29
|
40 |
},
|
41 |
{
|
42 |
"epoch": 2.07,
|
43 |
+
"learning_rate": 4.761904761904762e-05,
|
44 |
+
"loss": 1.2615,
|
45 |
"step": 30
|
46 |
},
|
47 |
{
|
48 |
"epoch": 2.76,
|
49 |
+
"learning_rate": 4.4973544973544974e-05,
|
50 |
+
"loss": 1.1276,
|
51 |
"step": 40
|
52 |
},
|
53 |
{
|
54 |
"epoch": 2.97,
|
55 |
+
"eval_accuracy": 0.5463414634146342,
|
56 |
+
"eval_loss": 1.052482008934021,
|
57 |
+
"eval_runtime": 3.482,
|
58 |
+
"eval_samples_per_second": 58.874,
|
59 |
+
"eval_steps_per_second": 2.01,
|
60 |
"step": 43
|
61 |
},
|
62 |
{
|
63 |
"epoch": 3.45,
|
64 |
+
"learning_rate": 4.232804232804233e-05,
|
65 |
+
"loss": 1.0796,
|
66 |
"step": 50
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
+
"eval_accuracy": 0.6536585365853659,
|
71 |
+
"eval_loss": 0.9085918664932251,
|
72 |
+
"eval_runtime": 3.5324,
|
73 |
+
"eval_samples_per_second": 58.034,
|
74 |
+
"eval_steps_per_second": 1.982,
|
75 |
"step": 58
|
76 |
},
|
77 |
{
|
78 |
"epoch": 4.14,
|
79 |
+
"learning_rate": 3.968253968253968e-05,
|
80 |
+
"loss": 1.0019,
|
81 |
"step": 60
|
82 |
},
|
83 |
{
|
84 |
"epoch": 4.83,
|
85 |
+
"learning_rate": 3.7037037037037037e-05,
|
86 |
+
"loss": 0.9387,
|
87 |
"step": 70
|
88 |
},
|
89 |
{
|
90 |
"epoch": 4.97,
|
91 |
+
"eval_accuracy": 0.6439024390243903,
|
92 |
+
"eval_loss": 0.8500024676322937,
|
93 |
+
"eval_runtime": 3.5195,
|
94 |
+
"eval_samples_per_second": 58.246,
|
95 |
+
"eval_steps_per_second": 1.989,
|
96 |
"step": 72
|
97 |
},
|
98 |
{
|
99 |
"epoch": 5.52,
|
100 |
+
"learning_rate": 3.439153439153439e-05,
|
101 |
+
"loss": 0.9232,
|
102 |
"step": 80
|
103 |
},
|
104 |
{
|
105 |
"epoch": 6.0,
|
106 |
+
"eval_accuracy": 0.6731707317073171,
|
107 |
+
"eval_loss": 0.8189888000488281,
|
108 |
+
"eval_runtime": 3.532,
|
109 |
+
"eval_samples_per_second": 58.041,
|
110 |
+
"eval_steps_per_second": 1.982,
|
111 |
"step": 87
|
112 |
},
|
113 |
{
|
114 |
"epoch": 6.21,
|
115 |
+
"learning_rate": 3.1746031746031745e-05,
|
116 |
+
"loss": 0.9018,
|
117 |
"step": 90
|
118 |
},
|
119 |
{
|
120 |
"epoch": 6.9,
|
121 |
+
"learning_rate": 2.91005291005291e-05,
|
122 |
+
"loss": 0.8456,
|
123 |
"step": 100
|
124 |
},
|
125 |
{
|
126 |
"epoch": 6.97,
|
127 |
+
"eval_accuracy": 0.6878048780487804,
|
128 |
+
"eval_loss": 0.8041830062866211,
|
129 |
+
"eval_runtime": 3.4986,
|
130 |
+
"eval_samples_per_second": 58.594,
|
131 |
+
"eval_steps_per_second": 2.001,
|
132 |
"step": 101
|
133 |
},
|
134 |
{
|
135 |
"epoch": 7.59,
|
136 |
+
"learning_rate": 2.6455026455026456e-05,
|
137 |
+
"loss": 0.8348,
|
138 |
"step": 110
|
139 |
},
|
140 |
{
|
141 |
"epoch": 8.0,
|
142 |
+
"eval_accuracy": 0.6926829268292682,
|
143 |
+
"eval_loss": 0.7769930362701416,
|
144 |
+
"eval_runtime": 3.5012,
|
145 |
+
"eval_samples_per_second": 58.552,
|
146 |
+
"eval_steps_per_second": 1.999,
|
147 |
"step": 116
|
148 |
},
|
149 |
{
|
150 |
"epoch": 8.28,
|
151 |
+
"learning_rate": 2.380952380952381e-05,
|
152 |
+
"loss": 0.8287,
|
153 |
"step": 120
|
154 |
},
|
155 |
{
|
156 |
"epoch": 8.97,
|
157 |
+
"learning_rate": 2.1164021164021164e-05,
|
158 |
+
"loss": 0.8057,
|
159 |
"step": 130
|
160 |
},
|
161 |
{
|
162 |
"epoch": 8.97,
|
163 |
+
"eval_accuracy": 0.7073170731707317,
|
164 |
+
"eval_loss": 0.7456830143928528,
|
165 |
+
"eval_runtime": 3.4827,
|
166 |
+
"eval_samples_per_second": 58.862,
|
167 |
+
"eval_steps_per_second": 2.01,
|
168 |
"step": 130
|
169 |
},
|
170 |
{
|
171 |
"epoch": 9.66,
|
172 |
+
"learning_rate": 1.8518518518518518e-05,
|
173 |
+
"loss": 0.8033,
|
174 |
"step": 140
|
175 |
},
|
176 |
{
|
177 |
+
"epoch": 10.0,
|
178 |
+
"eval_accuracy": 0.7024390243902439,
|
179 |
+
"eval_loss": 0.7352668642997742,
|
180 |
+
"eval_runtime": 3.5384,
|
181 |
+
"eval_samples_per_second": 57.935,
|
182 |
+
"eval_steps_per_second": 1.978,
|
183 |
+
"step": 145
|
184 |
},
|
185 |
{
|
186 |
+
"epoch": 10.34,
|
187 |
+
"learning_rate": 1.5873015873015872e-05,
|
188 |
+
"loss": 0.7822,
|
189 |
+
"step": 150
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 10.97,
|
193 |
+
"eval_accuracy": 0.7121951219512195,
|
194 |
+
"eval_loss": 0.7165755033493042,
|
195 |
+
"eval_runtime": 3.4957,
|
196 |
+
"eval_samples_per_second": 58.643,
|
197 |
+
"eval_steps_per_second": 2.002,
|
198 |
+
"step": 159
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 11.03,
|
202 |
+
"learning_rate": 1.3227513227513228e-05,
|
203 |
+
"loss": 0.8041,
|
204 |
+
"step": 160
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 11.72,
|
208 |
+
"learning_rate": 1.0582010582010582e-05,
|
209 |
+
"loss": 0.7594,
|
210 |
+
"step": 170
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"epoch": 12.0,
|
214 |
+
"eval_accuracy": 0.7170731707317073,
|
215 |
+
"eval_loss": 0.718829333782196,
|
216 |
+
"eval_runtime": 3.5638,
|
217 |
+
"eval_samples_per_second": 57.522,
|
218 |
+
"eval_steps_per_second": 1.964,
|
219 |
+
"step": 174
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 12.41,
|
223 |
+
"learning_rate": 7.936507936507936e-06,
|
224 |
+
"loss": 0.7777,
|
225 |
+
"step": 180
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"epoch": 12.97,
|
229 |
+
"eval_accuracy": 0.7170731707317073,
|
230 |
+
"eval_loss": 0.7086274027824402,
|
231 |
+
"eval_runtime": 3.4886,
|
232 |
+
"eval_samples_per_second": 58.762,
|
233 |
+
"eval_steps_per_second": 2.007,
|
234 |
+
"step": 188
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 13.1,
|
238 |
+
"learning_rate": 5.291005291005291e-06,
|
239 |
+
"loss": 0.7863,
|
240 |
+
"step": 190
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 13.79,
|
244 |
+
"learning_rate": 2.6455026455026455e-06,
|
245 |
+
"loss": 0.7445,
|
246 |
+
"step": 200
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"epoch": 14.0,
|
250 |
+
"eval_accuracy": 0.6878048780487804,
|
251 |
+
"eval_loss": 0.7139347791671753,
|
252 |
+
"eval_runtime": 3.4798,
|
253 |
+
"eval_samples_per_second": 58.912,
|
254 |
+
"eval_steps_per_second": 2.012,
|
255 |
+
"step": 203
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"epoch": 14.48,
|
259 |
+
"learning_rate": 0.0,
|
260 |
+
"loss": 0.7513,
|
261 |
+
"step": 210
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 14.48,
|
265 |
+
"eval_accuracy": 0.6878048780487804,
|
266 |
+
"eval_loss": 0.7138883471488953,
|
267 |
+
"eval_runtime": 3.48,
|
268 |
+
"eval_samples_per_second": 58.908,
|
269 |
+
"eval_steps_per_second": 2.012,
|
270 |
+
"step": 210
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"epoch": 14.48,
|
274 |
+
"step": 210,
|
275 |
+
"total_flos": 8.6685208493713e+17,
|
276 |
+
"train_loss": 0.9456698463076637,
|
277 |
+
"train_runtime": 1102.737,
|
278 |
+
"train_samples_per_second": 25.015,
|
279 |
+
"train_steps_per_second": 0.19
|
280 |
}
|
281 |
],
|
282 |
"logging_steps": 10,
|
283 |
+
"max_steps": 210,
|
284 |
"num_input_tokens_seen": 0,
|
285 |
+
"num_train_epochs": 15,
|
286 |
"save_steps": 500,
|
287 |
+
"total_flos": 8.6685208493713e+17,
|
288 |
"train_batch_size": 32,
|
289 |
"trial_name": null,
|
290 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53217e0c5182d6e5b40d2b23665f7781a2c9b4eacf79515ea542107971a6ea22
|
3 |
size 4792
|