Training in progress, step 5200
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +101 -5
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2843230968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:765ded4f85122106d3c7ca70ecc3286734cc83607e82caa135f0d00745a434b4
|
3 |
size 2843230968
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421591285
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b58d358224de084112dd9ddc81a1e58fe3a83a78d808b5c279b4b2401d5f2b1f
|
3 |
size 1421591285
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06744a746681eb14d92a4d87a2c4a3f33f8f040e9ff0c2cc0f5d538801039dce
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1785d5e671bc65a68d5c3dfbd2d502885b77ad2217c8d158cb3c228d55f0b090
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -118,11 +118,107 @@
|
|
118 |
"eval_samples_per_second": 766.977,
|
119 |
"eval_steps_per_second": 48.004,
|
120 |
"step": 2800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
}
|
122 |
],
|
123 |
"max_steps": 103008,
|
124 |
"num_train_epochs": 3,
|
125 |
-
"total_flos":
|
126 |
"trial_name": null,
|
127 |
"trial_params": null
|
128 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.9855364167633462,
|
3 |
+
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-4800",
|
4 |
+
"epoch": 0.15144454799627213,
|
5 |
+
"global_step": 5200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
118 |
"eval_samples_per_second": 766.977,
|
119 |
"eval_steps_per_second": 48.004,
|
120 |
"step": 2800
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 0.09,
|
124 |
+
"learning_rate": 6.212385944476801e-06,
|
125 |
+
"loss": 0.1229,
|
126 |
+
"step": 3200
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 0.09,
|
130 |
+
"eval_accuracy": 0.9820158504369031,
|
131 |
+
"eval_f1": 0.9819483437268689,
|
132 |
+
"eval_loss": 0.10309657454490662,
|
133 |
+
"eval_runtime": 13.2533,
|
134 |
+
"eval_samples_per_second": 742.608,
|
135 |
+
"eval_steps_per_second": 46.479,
|
136 |
+
"step": 3200
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 0.1,
|
140 |
+
"learning_rate": 6.988934187536401e-06,
|
141 |
+
"loss": 0.1291,
|
142 |
+
"step": 3600
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.1,
|
146 |
+
"eval_accuracy": 0.9842511684616948,
|
147 |
+
"eval_f1": 0.9842093647131546,
|
148 |
+
"eval_loss": 0.08313809335231781,
|
149 |
+
"eval_runtime": 13.0051,
|
150 |
+
"eval_samples_per_second": 756.778,
|
151 |
+
"eval_steps_per_second": 47.366,
|
152 |
+
"step": 3600
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 0.12,
|
156 |
+
"learning_rate": 7.765482430596002e-06,
|
157 |
+
"loss": 0.1049,
|
158 |
+
"step": 4000
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 0.12,
|
162 |
+
"eval_accuracy": 0.9824222718959561,
|
163 |
+
"eval_f1": 0.9823847600112674,
|
164 |
+
"eval_loss": 0.0907953679561615,
|
165 |
+
"eval_runtime": 12.984,
|
166 |
+
"eval_samples_per_second": 758.01,
|
167 |
+
"eval_steps_per_second": 47.443,
|
168 |
+
"step": 4000
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 0.13,
|
172 |
+
"learning_rate": 8.542030673655602e-06,
|
173 |
+
"loss": 0.1243,
|
174 |
+
"step": 4400
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.13,
|
178 |
+
"eval_accuracy": 0.9823206665311929,
|
179 |
+
"eval_f1": 0.9822362137371193,
|
180 |
+
"eval_loss": 0.0925893783569336,
|
181 |
+
"eval_runtime": 12.9285,
|
182 |
+
"eval_samples_per_second": 761.262,
|
183 |
+
"eval_steps_per_second": 47.647,
|
184 |
+
"step": 4400
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.14,
|
188 |
+
"learning_rate": 9.318578916715203e-06,
|
189 |
+
"loss": 0.1291,
|
190 |
+
"step": 4800
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"epoch": 0.14,
|
194 |
+
"eval_accuracy": 0.9855720382036172,
|
195 |
+
"eval_f1": 0.9855364167633462,
|
196 |
+
"eval_loss": 0.08309133350849152,
|
197 |
+
"eval_runtime": 12.8482,
|
198 |
+
"eval_samples_per_second": 766.023,
|
199 |
+
"eval_steps_per_second": 47.945,
|
200 |
+
"step": 4800
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"epoch": 0.15,
|
204 |
+
"learning_rate": 9.994992693419992e-06,
|
205 |
+
"loss": 0.1148,
|
206 |
+
"step": 5200
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 0.15,
|
210 |
+
"eval_accuracy": 0.983946352367405,
|
211 |
+
"eval_f1": 0.9839146445196306,
|
212 |
+
"eval_loss": 0.08415436744689941,
|
213 |
+
"eval_runtime": 12.8411,
|
214 |
+
"eval_samples_per_second": 766.443,
|
215 |
+
"eval_steps_per_second": 47.971,
|
216 |
+
"step": 5200
|
217 |
}
|
218 |
],
|
219 |
"max_steps": 103008,
|
220 |
"num_train_epochs": 3,
|
221 |
+
"total_flos": 1.020890573605968e+16,
|
222 |
"trial_name": null,
|
223 |
"trial_params": null
|
224 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421591285
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b58d358224de084112dd9ddc81a1e58fe3a83a78d808b5c279b4b2401d5f2b1f
|
3 |
size 1421591285
|