ncbateman commited on
Commit
311f4d0
1 Parent(s): bc83849

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc94ba8450a510cff44d172874e2147b62017a7b09db70a03ff21bb97b364a7
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6635fb1a5a4d1e52befb48d9c1dd6d22cef487e7060ad2ae8c77bf218b621c64
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb753185eaccdb5442e4ae0d8f1881afb3f2df47cdcd9c592fca0c8a776ea638
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f88a4454de958ff0c771396808a3201dd5527422259556c25c03c7e282c3fb
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94bc56f9e3ed1cb490edfa6ed817ab5a627f1fa72c4c42df8f65738ad0756b7b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f268972a397e2af6ad977488ad986f34c2f5a772c90c6a6cc2e109003c5ffb16
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edf0fb1dfa2c7ca025c1bcf4459d1d508b2f798ad95b431872a2ea669647ad00
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61ff627b3e4926c6ec03a811654366b4e4d6e1e7047b7acc119ae7fc1df0c10
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.14881915237787124,
5
  "eval_steps": 386,
6
- "global_step": 115,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -820,6 +820,41 @@
820
  "learning_rate": 9.992820196634273e-05,
821
  "loss": 0.9785,
822
  "step": 115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
823
  }
824
  ],
825
  "logging_steps": 1,
@@ -839,7 +874,7 @@
839
  "attributes": {}
840
  }
841
  },
842
- "total_flos": 1.2856277474476032e+17,
843
  "train_batch_size": 4,
844
  "trial_name": null,
845
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1552895503073439,
5
  "eval_steps": 386,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
820
  "learning_rate": 9.992820196634273e-05,
821
  "loss": 0.9785,
822
  "step": 115
823
+ },
824
+ {
825
+ "epoch": 0.15011323196376578,
826
+ "grad_norm": 0.8342046737670898,
827
+ "learning_rate": 9.992597635191509e-05,
828
+ "loss": 0.9291,
829
+ "step": 116
830
+ },
831
+ {
832
+ "epoch": 0.1514073115496603,
833
+ "grad_norm": 0.8460632562637329,
834
+ "learning_rate": 9.992371679244658e-05,
835
+ "loss": 0.8797,
836
+ "step": 117
837
+ },
838
+ {
839
+ "epoch": 0.15270139113555484,
840
+ "grad_norm": 0.933060348033905,
841
+ "learning_rate": 9.992142328947345e-05,
842
+ "loss": 0.9657,
843
+ "step": 118
844
+ },
845
+ {
846
+ "epoch": 0.15399547072144937,
847
+ "grad_norm": 0.8822593688964844,
848
+ "learning_rate": 9.991909584455511e-05,
849
+ "loss": 0.8872,
850
+ "step": 119
851
+ },
852
+ {
853
+ "epoch": 0.1552895503073439,
854
+ "grad_norm": 0.9599350094795227,
855
+ "learning_rate": 9.991673445927398e-05,
856
+ "loss": 0.9064,
857
+ "step": 120
858
  }
859
  ],
860
  "logging_steps": 1,
 
874
  "attributes": {}
875
  }
876
  },
877
+ "total_flos": 1.3415246060322816e+17,
878
  "train_batch_size": 4,
879
  "trial_name": null,
880
  "trial_params": null