k4black commited on
Commit
d7f3999
1 Parent(s): b66148a

Training in progress, step 7600

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:765ded4f85122106d3c7ca70ecc3286734cc83607e82caa135f0d00745a434b4
3
  size 2843230968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c468bcf7bddaad4cfe5f21a8be06f85b469953475ff323261f94d5ccdb8a1e8
3
  size 2843230968
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b58d358224de084112dd9ddc81a1e58fe3a83a78d808b5c279b4b2401d5f2b1f
3
  size 1421591285
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b80b7268d18e149e228859bf683e038919f218c78a06725456afb2efbd62075
3
  size 1421591285
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06744a746681eb14d92a4d87a2c4a3f33f8f040e9ff0c2cc0f5d538801039dce
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a76a4ff38a22dd253bb22a59e6c5f070adba4b19d7ceb156911d72dbd948a2
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1785d5e671bc65a68d5c3dfbd2d502885b77ad2217c8d158cb3c228d55f0b090
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb81e4964c296ed1f062cf4a686cdaf7267fba5c8b0915f34103b8221211c4e
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.9855364167633462,
3
- "best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-4800",
4
- "epoch": 0.15144454799627213,
5
- "global_step": 5200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -214,11 +214,107 @@
214
  "eval_samples_per_second": 766.443,
215
  "eval_steps_per_second": 47.971,
216
  "step": 5200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  }
218
  ],
219
  "max_steps": 103008,
220
  "num_train_epochs": 3,
221
- "total_flos": 1.020890573605968e+16,
222
  "trial_name": null,
223
  "trial_params": null
224
  }
 
1
  {
2
+ "best_metric": 0.9861495731704059,
3
+ "best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-7200",
4
+ "epoch": 0.2213420316868593,
5
+ "global_step": 7600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
214
  "eval_samples_per_second": 766.443,
215
  "eval_steps_per_second": 47.971,
216
  "step": 5200
217
+ },
218
+ {
219
+ "epoch": 0.16,
220
+ "learning_rate": 9.954116721338281e-06,
221
+ "loss": 0.1256,
222
+ "step": 5600
223
+ },
224
+ {
225
+ "epoch": 0.16,
226
+ "eval_accuracy": 0.9836415362731152,
227
+ "eval_f1": 0.9836219614739408,
228
+ "eval_loss": 0.084382563829422,
229
+ "eval_runtime": 13.2723,
230
+ "eval_samples_per_second": 741.546,
231
+ "eval_steps_per_second": 46.413,
232
+ "step": 5600
233
+ },
234
+ {
235
+ "epoch": 0.17,
236
+ "learning_rate": 9.91324074925657e-06,
237
+ "loss": 0.1207,
238
+ "step": 6000
239
+ },
240
+ {
241
+ "epoch": 0.17,
242
+ "eval_accuracy": 0.9829302987197724,
243
+ "eval_f1": 0.9829157443588743,
244
+ "eval_loss": 0.0957166850566864,
245
+ "eval_runtime": 12.9479,
246
+ "eval_samples_per_second": 760.125,
247
+ "eval_steps_per_second": 47.575,
248
+ "step": 6000
249
+ },
250
+ {
251
+ "epoch": 0.19,
252
+ "learning_rate": 9.872364777174857e-06,
253
+ "loss": 0.1162,
254
+ "step": 6400
255
+ },
256
+ {
257
+ "epoch": 0.19,
258
+ "eval_accuracy": 0.9859784596626702,
259
+ "eval_f1": 0.9859267481206228,
260
+ "eval_loss": 0.0752706453204155,
261
+ "eval_runtime": 13.005,
262
+ "eval_samples_per_second": 756.786,
263
+ "eval_steps_per_second": 47.366,
264
+ "step": 6400
265
+ },
266
+ {
267
+ "epoch": 0.2,
268
+ "learning_rate": 9.831488805093147e-06,
269
+ "loss": 0.095,
270
+ "step": 6800
271
+ },
272
+ {
273
+ "epoch": 0.2,
274
+ "eval_accuracy": 0.9844543791912213,
275
+ "eval_f1": 0.9844184876885574,
276
+ "eval_loss": 0.09373413771390915,
277
+ "eval_runtime": 13.0171,
278
+ "eval_samples_per_second": 756.082,
279
+ "eval_steps_per_second": 47.322,
280
+ "step": 6800
281
+ },
282
+ {
283
+ "epoch": 0.21,
284
+ "learning_rate": 9.790612833011435e-06,
285
+ "loss": 0.1069,
286
+ "step": 7200
287
+ },
288
+ {
289
+ "epoch": 0.21,
290
+ "eval_accuracy": 0.9861816703921967,
291
+ "eval_f1": 0.9861495731704059,
292
+ "eval_loss": 0.07330357283353806,
293
+ "eval_runtime": 12.8964,
294
+ "eval_samples_per_second": 763.158,
295
+ "eval_steps_per_second": 47.765,
296
+ "step": 7200
297
+ },
298
+ {
299
+ "epoch": 0.22,
300
+ "learning_rate": 9.749736860929725e-06,
301
+ "loss": 0.1054,
302
+ "step": 7600
303
+ },
304
+ {
305
+ "epoch": 0.22,
306
+ "eval_accuracy": 0.9853688274740906,
307
+ "eval_f1": 0.9853406697769191,
308
+ "eval_loss": 0.07271973788738251,
309
+ "eval_runtime": 12.8137,
310
+ "eval_samples_per_second": 768.085,
311
+ "eval_steps_per_second": 48.074,
312
+ "step": 7600
313
  }
314
  ],
315
  "max_steps": 103008,
316
  "num_train_epochs": 3,
317
+ "total_flos": 1.4913660628723872e+16,
318
  "trial_name": null,
319
  "trial_params": null
320
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b58d358224de084112dd9ddc81a1e58fe3a83a78d808b5c279b4b2401d5f2b1f
3
  size 1421591285
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b80b7268d18e149e228859bf683e038919f218c78a06725456afb2efbd62075
3
  size 1421591285