ncbateman commited on
Commit
c42384a
·
verified ·
1 Parent(s): cbb612f

Training in progress, step 95, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:984a7527231fdcf924c550cc4a8e0ad6fc5902a246abec109bf86ee6e40f3e96
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b8ed5bd2b484501ed9a9750ab3c46606b53a398ada09af49075cb9868a1dbd
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69da363107c07a785017addea26230ce3041c8a5b658259b6c960c80f4cd4391
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873e123d716b4e3681b7bdcf8b6d4024d99e642a789148deeef040a213b7988f
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8226228c393b137baf64ed3f82c6ed8807e456d3f8e179f09160bf26e5dcc037
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55974460331ed68231a1ad9dc00a5262b80738898a81bf122859c075aa9b16c2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d72920266addd1520c8e9d88dcca5854048e355bd6edd8cc80e32e4e48b95586
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574ab12ec5e33abca7f4c7bc83b682560fcb2b9d98fc4110c6edb18204a6e3fe
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.11646716273050793,
5
  "eval_steps": 386,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -645,6 +645,41 @@
645
  "learning_rate": 9.997280616774147e-05,
646
  "loss": 1.1672,
647
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
648
  }
649
  ],
650
  "logging_steps": 1,
@@ -664,7 +699,7 @@
664
  "attributes": {}
665
  }
666
  },
667
- "total_flos": 1.0061434545242112e+17,
668
  "train_batch_size": 4,
669
  "trial_name": null,
670
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12293756065998059,
5
  "eval_steps": 386,
6
+ "global_step": 95,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
645
  "learning_rate": 9.997280616774147e-05,
646
  "loss": 1.1672,
647
  "step": 90
648
+ },
649
+ {
650
+ "epoch": 0.11776124231640246,
651
+ "grad_norm": 1.0035395622253418,
652
+ "learning_rate": 9.997142961110634e-05,
653
+ "loss": 0.9294,
654
+ "step": 91
655
+ },
656
+ {
657
+ "epoch": 0.11905532190229699,
658
+ "grad_norm": 1.1194915771484375,
659
+ "learning_rate": 9.997001907852635e-05,
660
+ "loss": 1.0857,
661
+ "step": 92
662
+ },
663
+ {
664
+ "epoch": 0.12034940148819152,
665
+ "grad_norm": 1.5234825611114502,
666
+ "learning_rate": 9.996857457096047e-05,
667
+ "loss": 1.027,
668
+ "step": 93
669
+ },
670
+ {
671
+ "epoch": 0.12164348107408605,
672
+ "grad_norm": 0.949878454208374,
673
+ "learning_rate": 9.996709608939088e-05,
674
+ "loss": 0.8173,
675
+ "step": 94
676
+ },
677
+ {
678
+ "epoch": 0.12293756065998059,
679
+ "grad_norm": 0.8736472129821777,
680
+ "learning_rate": 9.996558363482277e-05,
681
+ "loss": 0.855,
682
+ "step": 95
683
  }
684
  ],
685
  "logging_steps": 1,
 
699
  "attributes": {}
700
  }
701
  },
702
+ "total_flos": 1.0620403131088896e+17,
703
  "train_batch_size": 4,
704
  "trial_name": null,
705
  "trial_params": null