ncbateman commited on
Commit
9033988
1 Parent(s): 4726c06

Training in progress, step 245, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aa1d020dcc8571e736d654589ba8f174c1261833c4fd9844032b87ef8b2d71b
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1589e42cbae5378987344eae93a5cbeb6aaea407f7da88b4a6f4a9d93d04e89
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859b8602a8478ae50e82324137580c38819b9c9bf26b6010fcb6c1f0f0e866d6
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6cf8294ac17ea5a1d733fcc4cfdc564357fe4b651af698fb1c9e155e20e5cd8
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a355d7114e76b054cea02bceff9680d87cea336f904402a275024cc0ffe7579c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386ad13fe5d816f22a0cb03c574c387eb21070a41c3bcfa2c61c742027c54d4e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ded87d71d8bbba78392a1f9d1a58531264c1ec39c9fa611e64bbe0bf1666151c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb0b702704ac2295f35bde3f325d34f67803c665d4df40762f063d3651311c1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3105791006146878,
5
  "eval_steps": 386,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1695,6 +1695,41 @@
1695
  "learning_rate": 9.938763759218185e-05,
1696
  "loss": 0.9736,
1697
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1698
  }
1699
  ],
1700
  "logging_steps": 1,
@@ -1714,7 +1749,7 @@
1714
  "attributes": {}
1715
  }
1716
  },
1717
- "total_flos": 2.6830492120645632e+17,
1718
  "train_batch_size": 4,
1719
  "trial_name": null,
1720
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3170494985441605,
5
  "eval_steps": 386,
6
+ "global_step": 245,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1695
  "learning_rate": 9.938763759218185e-05,
1696
  "loss": 0.9736,
1697
  "step": 240
1698
+ },
1699
+ {
1700
+ "epoch": 0.31187318020058236,
1701
+ "grad_norm": 0.7517886161804199,
1702
+ "learning_rate": 9.93811880731239e-05,
1703
+ "loss": 0.8436,
1704
+ "step": 241
1705
+ },
1706
+ {
1707
+ "epoch": 0.31316725978647686,
1708
+ "grad_norm": 0.8159210681915283,
1709
+ "learning_rate": 9.937470497943064e-05,
1710
+ "loss": 0.7521,
1711
+ "step": 242
1712
+ },
1713
+ {
1714
+ "epoch": 0.3144613393723714,
1715
+ "grad_norm": 0.9554911851882935,
1716
+ "learning_rate": 9.936818831550998e-05,
1717
+ "loss": 1.1076,
1718
+ "step": 243
1719
+ },
1720
+ {
1721
+ "epoch": 0.3157554189582659,
1722
+ "grad_norm": 0.8745877742767334,
1723
+ "learning_rate": 9.936163808579266e-05,
1724
+ "loss": 0.8908,
1725
+ "step": 244
1726
+ },
1727
+ {
1728
+ "epoch": 0.3170494985441605,
1729
+ "grad_norm": 0.8050674200057983,
1730
+ "learning_rate": 9.93550542947322e-05,
1731
+ "loss": 1.0134,
1732
+ "step": 245
1733
  }
1734
  ],
1735
  "logging_steps": 1,
 
1749
  "attributes": {}
1750
  }
1751
  },
1752
+ "total_flos": 2.7389460706492416e+17,
1753
  "train_batch_size": 4,
1754
  "trial_name": null,
1755
  "trial_params": null