Training in progress, step 7600
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +101 -5
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2843230968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c468bcf7bddaad4cfe5f21a8be06f85b469953475ff323261f94d5ccdb8a1e8
|
3 |
size 2843230968
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421591285
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b80b7268d18e149e228859bf683e038919f218c78a06725456afb2efbd62075
|
3 |
size 1421591285
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79a76a4ff38a22dd253bb22a59e6c5f070adba4b19d7ceb156911d72dbd948a2
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4eb81e4964c296ed1f062cf4a686cdaf7267fba5c8b0915f34103b8221211c4e
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -214,11 +214,107 @@
|
|
214 |
"eval_samples_per_second": 766.443,
|
215 |
"eval_steps_per_second": 47.971,
|
216 |
"step": 5200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
}
|
218 |
],
|
219 |
"max_steps": 103008,
|
220 |
"num_train_epochs": 3,
|
221 |
-
"total_flos": 1.
|
222 |
"trial_name": null,
|
223 |
"trial_params": null
|
224 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.9861495731704059,
|
3 |
+
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-7200",
|
4 |
+
"epoch": 0.2213420316868593,
|
5 |
+
"global_step": 7600,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
214 |
"eval_samples_per_second": 766.443,
|
215 |
"eval_steps_per_second": 47.971,
|
216 |
"step": 5200
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"epoch": 0.16,
|
220 |
+
"learning_rate": 9.954116721338281e-06,
|
221 |
+
"loss": 0.1256,
|
222 |
+
"step": 5600
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 0.16,
|
226 |
+
"eval_accuracy": 0.9836415362731152,
|
227 |
+
"eval_f1": 0.9836219614739408,
|
228 |
+
"eval_loss": 0.084382563829422,
|
229 |
+
"eval_runtime": 13.2723,
|
230 |
+
"eval_samples_per_second": 741.546,
|
231 |
+
"eval_steps_per_second": 46.413,
|
232 |
+
"step": 5600
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"epoch": 0.17,
|
236 |
+
"learning_rate": 9.91324074925657e-06,
|
237 |
+
"loss": 0.1207,
|
238 |
+
"step": 6000
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"epoch": 0.17,
|
242 |
+
"eval_accuracy": 0.9829302987197724,
|
243 |
+
"eval_f1": 0.9829157443588743,
|
244 |
+
"eval_loss": 0.0957166850566864,
|
245 |
+
"eval_runtime": 12.9479,
|
246 |
+
"eval_samples_per_second": 760.125,
|
247 |
+
"eval_steps_per_second": 47.575,
|
248 |
+
"step": 6000
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"epoch": 0.19,
|
252 |
+
"learning_rate": 9.872364777174857e-06,
|
253 |
+
"loss": 0.1162,
|
254 |
+
"step": 6400
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.19,
|
258 |
+
"eval_accuracy": 0.9859784596626702,
|
259 |
+
"eval_f1": 0.9859267481206228,
|
260 |
+
"eval_loss": 0.0752706453204155,
|
261 |
+
"eval_runtime": 13.005,
|
262 |
+
"eval_samples_per_second": 756.786,
|
263 |
+
"eval_steps_per_second": 47.366,
|
264 |
+
"step": 6400
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 0.2,
|
268 |
+
"learning_rate": 9.831488805093147e-06,
|
269 |
+
"loss": 0.095,
|
270 |
+
"step": 6800
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"epoch": 0.2,
|
274 |
+
"eval_accuracy": 0.9844543791912213,
|
275 |
+
"eval_f1": 0.9844184876885574,
|
276 |
+
"eval_loss": 0.09373413771390915,
|
277 |
+
"eval_runtime": 13.0171,
|
278 |
+
"eval_samples_per_second": 756.082,
|
279 |
+
"eval_steps_per_second": 47.322,
|
280 |
+
"step": 6800
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"epoch": 0.21,
|
284 |
+
"learning_rate": 9.790612833011435e-06,
|
285 |
+
"loss": 0.1069,
|
286 |
+
"step": 7200
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"epoch": 0.21,
|
290 |
+
"eval_accuracy": 0.9861816703921967,
|
291 |
+
"eval_f1": 0.9861495731704059,
|
292 |
+
"eval_loss": 0.07330357283353806,
|
293 |
+
"eval_runtime": 12.8964,
|
294 |
+
"eval_samples_per_second": 763.158,
|
295 |
+
"eval_steps_per_second": 47.765,
|
296 |
+
"step": 7200
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.22,
|
300 |
+
"learning_rate": 9.749736860929725e-06,
|
301 |
+
"loss": 0.1054,
|
302 |
+
"step": 7600
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"epoch": 0.22,
|
306 |
+
"eval_accuracy": 0.9853688274740906,
|
307 |
+
"eval_f1": 0.9853406697769191,
|
308 |
+
"eval_loss": 0.07271973788738251,
|
309 |
+
"eval_runtime": 12.8137,
|
310 |
+
"eval_samples_per_second": 768.085,
|
311 |
+
"eval_steps_per_second": 48.074,
|
312 |
+
"step": 7600
|
313 |
}
|
314 |
],
|
315 |
"max_steps": 103008,
|
316 |
"num_train_epochs": 3,
|
317 |
+
"total_flos": 1.4913660628723872e+16,
|
318 |
"trial_name": null,
|
319 |
"trial_params": null
|
320 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421591285
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b80b7268d18e149e228859bf683e038919f218c78a06725456afb2efbd62075
|
3 |
size 1421591285
|