bobox commited on
Commit
8f36d77
1 Parent(s): 511446a

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -775,6 +775,16 @@ You can finetune this model on your own dataset.
775
  | 0.8 | 3768 | 0.9259 | 0.3853 | 0.1646 | 0.2819 |
776
  | 0.9 | 4239 | 0.8709 | 0.3749 | 0.1157 | 0.2912 |
777
  | 1.0 | 4710 | 0.8686 | 0.3636 | 0.0961 | 0.3109 |
 
 
 
 
 
 
 
 
 
 
778
 
779
 
780
  ### Framework Versions
 
775
  | 0.8 | 3768 | 0.9259 | 0.3853 | 0.1646 | 0.2819 |
776
  | 0.9 | 4239 | 0.8709 | 0.3749 | 0.1157 | 0.2912 |
777
  | 1.0 | 4710 | 0.8686 | 0.3636 | 0.0961 | 0.3109 |
778
+ | 1.1 | 5181 | 0.726 | 0.3744 | 0.0453 | 0.3424 |
779
+ | 1.2 | 5652 | 0.8151 | 0.3502 | 0.1835 | 0.2602 |
780
+ | 1.3 | 6123 | 0.7127 | 0.3362 | 0.1089 | 0.2460 |
781
+ | 1.4 | 6594 | 0.8408 | 0.3184 | 0.0701 | 0.2784 |
782
+ | 1.5 | 7065 | 0.7845 | 0.3191 | 0.0318 | 0.2822 |
783
+ | 1.6 | 7536 | 0.5766 | 0.3056 | 0.0566 | 0.2774 |
784
+ | 1.7 | 8007 | 0.7304 | 0.2991 | 0.0542 | 0.2736 |
785
+ | 1.8 | 8478 | 0.6639 | 0.2949 | 0.0515 | 0.2694 |
786
+ | 1.9 | 8949 | 0.6153 | 0.2938 | 0.0589 | 0.2718 |
787
+ | 2.0 | 9420 | 0.6665 | 0.2937 | 0.0569 | 0.2724 |
788
 
789
 
790
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24a991a022791dbad68bcccfc1a822f0a523b0d0033cb55ebbbb7278fa749813
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cf03dc9085ffef97e239bba9d281185e9db1fd4e0da8fb9ed67d08da8d63ed
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2ad0a254872f555e16b4825539b2ed51a358095478594397a27f095a144279b
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad5f2f78e52ff86b9024a77895dae5ac7daf09ebac038d6b7e6d7a109b7fab1
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c341d87b21e7e32706eb595f0f035e1e0247fe56c8c298f72c6e286fd8ab0e20
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc3e6bd6964335e97fd00e4ed99553574e6a0c29df42723a319985da7eb09a2c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:072f295cd9400d44a23f01cc82ad8c9b8b89be4ef3aba1d3b8e750e9883aec90
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0b1665ad9a994274278193da377d00cc0a72d4cbeda48768b256548dcea6f9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 471,
6
- "global_step": 4710,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -317,6 +317,316 @@
317
  "eval_qnli-contrastive_samples_per_second": 347.537,
318
  "eval_qnli-contrastive_steps_per_second": 21.757,
319
  "step": 4710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  }
321
  ],
322
  "logging_steps": 471,
@@ -331,7 +641,7 @@
331
  "should_evaluate": false,
332
  "should_log": false,
333
  "should_save": true,
334
- "should_training_stop": false
335
  },
336
  "attributes": {}
337
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 471,
6
+ "global_step": 9420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
317
  "eval_qnli-contrastive_samples_per_second": 347.537,
318
  "eval_qnli-contrastive_steps_per_second": 21.757,
319
  "step": 4710
320
+ },
321
+ {
322
+ "epoch": 1.1,
323
+ "grad_norm": 17.123151779174805,
324
+ "learning_rate": 1.5161494182199708e-05,
325
+ "loss": 0.726,
326
+ "step": 5181
327
+ },
328
+ {
329
+ "epoch": 1.1,
330
+ "eval_nli-pairs_loss": 0.37437891960144043,
331
+ "eval_nli-pairs_runtime": 14.6826,
332
+ "eval_nli-pairs_samples_per_second": 463.677,
333
+ "eval_nli-pairs_steps_per_second": 29.014,
334
+ "step": 5181
335
+ },
336
+ {
337
+ "epoch": 1.1,
338
+ "eval_scitail-pairs-pos_loss": 0.34239521622657776,
339
+ "eval_scitail-pairs-pos_runtime": 3.4343,
340
+ "eval_scitail-pairs-pos_samples_per_second": 379.703,
341
+ "eval_scitail-pairs-pos_steps_per_second": 23.877,
342
+ "step": 5181
343
+ },
344
+ {
345
+ "epoch": 1.1,
346
+ "eval_qnli-contrastive_loss": 0.04533466696739197,
347
+ "eval_qnli-contrastive_runtime": 15.5919,
348
+ "eval_qnli-contrastive_samples_per_second": 350.374,
349
+ "eval_qnli-contrastive_steps_per_second": 21.934,
350
+ "step": 5181
351
+ },
352
+ {
353
+ "epoch": 1.2,
354
+ "grad_norm": 4.296922206878662,
355
+ "learning_rate": 1.3030457061862642e-05,
356
+ "loss": 0.8151,
357
+ "step": 5652
358
+ },
359
+ {
360
+ "epoch": 1.2,
361
+ "eval_nli-pairs_loss": 0.3501867353916168,
362
+ "eval_nli-pairs_runtime": 14.648,
363
+ "eval_nli-pairs_samples_per_second": 464.772,
364
+ "eval_nli-pairs_steps_per_second": 29.082,
365
+ "step": 5652
366
+ },
367
+ {
368
+ "epoch": 1.2,
369
+ "eval_scitail-pairs-pos_loss": 0.26023754477500916,
370
+ "eval_scitail-pairs-pos_runtime": 3.3385,
371
+ "eval_scitail-pairs-pos_samples_per_second": 390.589,
372
+ "eval_scitail-pairs-pos_steps_per_second": 24.562,
373
+ "step": 5652
374
+ },
375
+ {
376
+ "epoch": 1.2,
377
+ "eval_qnli-contrastive_loss": 0.18350932002067566,
378
+ "eval_qnli-contrastive_runtime": 15.5173,
379
+ "eval_qnli-contrastive_samples_per_second": 352.059,
380
+ "eval_qnli-contrastive_steps_per_second": 22.04,
381
+ "step": 5652
382
+ },
383
+ {
384
+ "epoch": 1.3,
385
+ "grad_norm": 5.370415210723877,
386
+ "learning_rate": 1.0733590017323587e-05,
387
+ "loss": 0.7127,
388
+ "step": 6123
389
+ },
390
+ {
391
+ "epoch": 1.3,
392
+ "eval_nli-pairs_loss": 0.33619123697280884,
393
+ "eval_nli-pairs_runtime": 14.5016,
394
+ "eval_nli-pairs_samples_per_second": 469.464,
395
+ "eval_nli-pairs_steps_per_second": 29.376,
396
+ "step": 6123
397
+ },
398
+ {
399
+ "epoch": 1.3,
400
+ "eval_scitail-pairs-pos_loss": 0.24599790573120117,
401
+ "eval_scitail-pairs-pos_runtime": 3.3041,
402
+ "eval_scitail-pairs-pos_samples_per_second": 394.666,
403
+ "eval_scitail-pairs-pos_steps_per_second": 24.818,
404
+ "step": 6123
405
+ },
406
+ {
407
+ "epoch": 1.3,
408
+ "eval_qnli-contrastive_loss": 0.10889358073472977,
409
+ "eval_qnli-contrastive_runtime": 15.451,
410
+ "eval_qnli-contrastive_samples_per_second": 353.569,
411
+ "eval_qnli-contrastive_steps_per_second": 22.134,
412
+ "step": 6123
413
+ },
414
+ {
415
+ "epoch": 1.4,
416
+ "grad_norm": 105.34712219238281,
417
+ "learning_rate": 8.401493879376199e-06,
418
+ "loss": 0.8408,
419
+ "step": 6594
420
+ },
421
+ {
422
+ "epoch": 1.4,
423
+ "eval_nli-pairs_loss": 0.3184218406677246,
424
+ "eval_nli-pairs_runtime": 14.4863,
425
+ "eval_nli-pairs_samples_per_second": 469.961,
426
+ "eval_nli-pairs_steps_per_second": 29.407,
427
+ "step": 6594
428
+ },
429
+ {
430
+ "epoch": 1.4,
431
+ "eval_scitail-pairs-pos_loss": 0.27837762236595154,
432
+ "eval_scitail-pairs-pos_runtime": 3.3858,
433
+ "eval_scitail-pairs-pos_samples_per_second": 385.134,
434
+ "eval_scitail-pairs-pos_steps_per_second": 24.219,
435
+ "step": 6594
436
+ },
437
+ {
438
+ "epoch": 1.4,
439
+ "eval_qnli-contrastive_loss": 0.07013922929763794,
440
+ "eval_qnli-contrastive_runtime": 15.4468,
441
+ "eval_qnli-contrastive_samples_per_second": 353.666,
442
+ "eval_qnli-contrastive_steps_per_second": 22.141,
443
+ "step": 6594
444
+ },
445
+ {
446
+ "epoch": 1.5,
447
+ "grad_norm": 1.1197956800460815,
448
+ "learning_rate": 6.15190528470631e-06,
449
+ "loss": 0.7845,
450
+ "step": 7065
451
+ },
452
+ {
453
+ "epoch": 1.5,
454
+ "eval_nli-pairs_loss": 0.3191192150115967,
455
+ "eval_nli-pairs_runtime": 14.505,
456
+ "eval_nli-pairs_samples_per_second": 469.355,
457
+ "eval_nli-pairs_steps_per_second": 29.369,
458
+ "step": 7065
459
+ },
460
+ {
461
+ "epoch": 1.5,
462
+ "eval_scitail-pairs-pos_loss": 0.2821648120880127,
463
+ "eval_scitail-pairs-pos_runtime": 3.3778,
464
+ "eval_scitail-pairs-pos_samples_per_second": 386.049,
465
+ "eval_scitail-pairs-pos_steps_per_second": 24.276,
466
+ "step": 7065
467
+ },
468
+ {
469
+ "epoch": 1.5,
470
+ "eval_qnli-contrastive_loss": 0.03179321065545082,
471
+ "eval_qnli-contrastive_runtime": 15.41,
472
+ "eval_qnli-contrastive_samples_per_second": 354.509,
473
+ "eval_qnli-contrastive_steps_per_second": 22.193,
474
+ "step": 7065
475
+ },
476
+ {
477
+ "epoch": 1.6,
478
+ "grad_norm": 3.4374799728393555,
479
+ "learning_rate": 4.1128886407133994e-06,
480
+ "loss": 0.5766,
481
+ "step": 7536
482
+ },
483
+ {
484
+ "epoch": 1.6,
485
+ "eval_nli-pairs_loss": 0.30556315183639526,
486
+ "eval_nli-pairs_runtime": 14.4825,
487
+ "eval_nli-pairs_samples_per_second": 470.083,
488
+ "eval_nli-pairs_steps_per_second": 29.415,
489
+ "step": 7536
490
+ },
491
+ {
492
+ "epoch": 1.6,
493
+ "eval_scitail-pairs-pos_loss": 0.277355819940567,
494
+ "eval_scitail-pairs-pos_runtime": 3.3048,
495
+ "eval_scitail-pairs-pos_samples_per_second": 394.581,
496
+ "eval_scitail-pairs-pos_steps_per_second": 24.813,
497
+ "step": 7536
498
+ },
499
+ {
500
+ "epoch": 1.6,
501
+ "eval_qnli-contrastive_loss": 0.056649066507816315,
502
+ "eval_qnli-contrastive_runtime": 15.5222,
503
+ "eval_qnli-contrastive_samples_per_second": 351.949,
504
+ "eval_qnli-contrastive_steps_per_second": 22.033,
505
+ "step": 7536
506
+ },
507
+ {
508
+ "epoch": 1.7,
509
+ "grad_norm": 5.434581279754639,
510
+ "learning_rate": 2.3960211678026622e-06,
511
+ "loss": 0.7304,
512
+ "step": 8007
513
+ },
514
+ {
515
+ "epoch": 1.7,
516
+ "eval_nli-pairs_loss": 0.29907679557800293,
517
+ "eval_nli-pairs_runtime": 14.5118,
518
+ "eval_nli-pairs_samples_per_second": 469.136,
519
+ "eval_nli-pairs_steps_per_second": 29.355,
520
+ "step": 8007
521
+ },
522
+ {
523
+ "epoch": 1.7,
524
+ "eval_scitail-pairs-pos_loss": 0.2736453711986542,
525
+ "eval_scitail-pairs-pos_runtime": 3.2966,
526
+ "eval_scitail-pairs-pos_samples_per_second": 395.561,
527
+ "eval_scitail-pairs-pos_steps_per_second": 24.874,
528
+ "step": 8007
529
+ },
530
+ {
531
+ "epoch": 1.7,
532
+ "eval_qnli-contrastive_loss": 0.0541638545691967,
533
+ "eval_qnli-contrastive_runtime": 15.4865,
534
+ "eval_qnli-contrastive_samples_per_second": 352.758,
535
+ "eval_qnli-contrastive_steps_per_second": 22.084,
536
+ "step": 8007
537
+ },
538
+ {
539
+ "epoch": 1.8,
540
+ "grad_norm": 17.071992874145508,
541
+ "learning_rate": 1.0952517314705368e-06,
542
+ "loss": 0.6639,
543
+ "step": 8478
544
+ },
545
+ {
546
+ "epoch": 1.8,
547
+ "eval_nli-pairs_loss": 0.29488247632980347,
548
+ "eval_nli-pairs_runtime": 14.521,
549
+ "eval_nli-pairs_samples_per_second": 468.837,
550
+ "eval_nli-pairs_steps_per_second": 29.337,
551
+ "step": 8478
552
+ },
553
+ {
554
+ "epoch": 1.8,
555
+ "eval_scitail-pairs-pos_loss": 0.26940035820007324,
556
+ "eval_scitail-pairs-pos_runtime": 3.3511,
557
+ "eval_scitail-pairs-pos_samples_per_second": 389.126,
558
+ "eval_scitail-pairs-pos_steps_per_second": 24.47,
559
+ "step": 8478
560
+ },
561
+ {
562
+ "epoch": 1.8,
563
+ "eval_qnli-contrastive_loss": 0.05149933323264122,
564
+ "eval_qnli-contrastive_runtime": 15.4893,
565
+ "eval_qnli-contrastive_samples_per_second": 352.694,
566
+ "eval_qnli-contrastive_steps_per_second": 22.08,
567
+ "step": 8478
568
+ },
569
+ {
570
+ "epoch": 1.9,
571
+ "grad_norm": 4.333444118499756,
572
+ "learning_rate": 2.817598576525049e-07,
573
+ "loss": 0.6153,
574
+ "step": 8949
575
+ },
576
+ {
577
+ "epoch": 1.9,
578
+ "eval_nli-pairs_loss": 0.29380860924720764,
579
+ "eval_nli-pairs_runtime": 14.6947,
580
+ "eval_nli-pairs_samples_per_second": 463.296,
581
+ "eval_nli-pairs_steps_per_second": 28.99,
582
+ "step": 8949
583
+ },
584
+ {
585
+ "epoch": 1.9,
586
+ "eval_scitail-pairs-pos_loss": 0.27175840735435486,
587
+ "eval_scitail-pairs-pos_runtime": 3.3734,
588
+ "eval_scitail-pairs-pos_samples_per_second": 386.557,
589
+ "eval_scitail-pairs-pos_steps_per_second": 24.308,
590
+ "step": 8949
591
+ },
592
+ {
593
+ "epoch": 1.9,
594
+ "eval_qnli-contrastive_loss": 0.05886112153530121,
595
+ "eval_qnli-contrastive_runtime": 15.7063,
596
+ "eval_qnli-contrastive_samples_per_second": 347.823,
597
+ "eval_qnli-contrastive_steps_per_second": 21.775,
598
+ "step": 8949
599
+ },
600
+ {
601
+ "epoch": 2.0,
602
+ "grad_norm": 17.020780563354492,
603
+ "learning_rate": 7.929627552805131e-11,
604
+ "loss": 0.6665,
605
+ "step": 9420
606
+ },
607
+ {
608
+ "epoch": 2.0,
609
+ "eval_nli-pairs_loss": 0.2937406301498413,
610
+ "eval_nli-pairs_runtime": 14.6591,
611
+ "eval_nli-pairs_samples_per_second": 464.421,
612
+ "eval_nli-pairs_steps_per_second": 29.06,
613
+ "step": 9420
614
+ },
615
+ {
616
+ "epoch": 2.0,
617
+ "eval_scitail-pairs-pos_loss": 0.27235355973243713,
618
+ "eval_scitail-pairs-pos_runtime": 3.3473,
619
+ "eval_scitail-pairs-pos_samples_per_second": 389.563,
620
+ "eval_scitail-pairs-pos_steps_per_second": 24.497,
621
+ "step": 9420
622
+ },
623
+ {
624
+ "epoch": 2.0,
625
+ "eval_qnli-contrastive_loss": 0.05692654103040695,
626
+ "eval_qnli-contrastive_runtime": 15.5164,
627
+ "eval_qnli-contrastive_samples_per_second": 352.078,
628
+ "eval_qnli-contrastive_steps_per_second": 22.041,
629
+ "step": 9420
630
  }
631
  ],
632
  "logging_steps": 471,
 
641
  "should_evaluate": false,
642
  "should_log": false,
643
  "should_save": true,
644
+ "should_training_stop": true
645
  },
646
  "attributes": {}
647
  }