ncbateman commited on
Commit
c086ac7
·
verified ·
1 Parent(s): 8b396d0

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1589e42cbae5378987344eae93a5cbeb6aaea407f7da88b4a6f4a9d93d04e89
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1281b650194f1fbbf19c77dce0688ee14dcd633838ccfe32e26ddcbfabc606a
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6cf8294ac17ea5a1d733fcc4cfdc564357fe4b651af698fb1c9e155e20e5cd8
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20441cfd1a765ce19bd0c3aa3fa3f6568100e50441051ecd815972d170df6441
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:386ad13fe5d816f22a0cb03c574c387eb21070a41c3bcfa2c61c742027c54d4e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1242a8006a137d7d7fdf5051a8f1d3180b356281e9f9b85b1f07ab3614b81f01
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fb0b702704ac2295f35bde3f325d34f67803c665d4df40762f063d3651311c1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e258cdfd2c36c8c2f0b10b58594d1a291f5e6c49df7a4537b7e3a4fcef2b8c0b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3170494985441605,
5
  "eval_steps": 386,
6
- "global_step": 245,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1730,6 +1730,41 @@
1730
  "learning_rate": 9.93550542947322e-05,
1731
  "loss": 1.0134,
1732
  "step": 245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1733
  }
1734
  ],
1735
  "logging_steps": 1,
@@ -1749,7 +1784,7 @@
1749
  "attributes": {}
1750
  }
1751
  },
1752
- "total_flos": 2.7389460706492416e+17,
1753
  "train_batch_size": 4,
1754
  "trial_name": null,
1755
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3235198964736331,
5
  "eval_steps": 386,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1730
  "learning_rate": 9.93550542947322e-05,
1731
  "loss": 1.0134,
1732
  "step": 245
1733
+ },
1734
+ {
1735
+ "epoch": 0.318343578130055,
1736
+ "grad_norm": 1.009790062904358,
1737
+ "learning_rate": 9.9348436946805e-05,
1738
+ "loss": 1.0264,
1739
+ "step": 246
1740
+ },
1741
+ {
1742
+ "epoch": 0.31963765771594954,
1743
+ "grad_norm": 0.8702448606491089,
1744
+ "learning_rate": 9.934178604651023e-05,
1745
+ "loss": 1.0067,
1746
+ "step": 247
1747
+ },
1748
+ {
1749
+ "epoch": 0.32093173730184404,
1750
+ "grad_norm": 0.8105303049087524,
1751
+ "learning_rate": 9.933510159836989e-05,
1752
+ "loss": 0.8121,
1753
+ "step": 248
1754
+ },
1755
+ {
1756
+ "epoch": 0.3222258168877386,
1757
+ "grad_norm": 0.7680085897445679,
1758
+ "learning_rate": 9.932838360692878e-05,
1759
+ "loss": 0.8951,
1760
+ "step": 249
1761
+ },
1762
+ {
1763
+ "epoch": 0.3235198964736331,
1764
+ "grad_norm": 0.8338052034378052,
1765
+ "learning_rate": 9.93216320767545e-05,
1766
+ "loss": 0.8878,
1767
+ "step": 250
1768
  }
1769
  ],
1770
  "logging_steps": 1,
 
1784
  "attributes": {}
1785
  }
1786
  },
1787
+ "total_flos": 2.79484292923392e+17,
1788
  "train_batch_size": 4,
1789
  "trial_name": null,
1790
  "trial_params": null