ncbateman commited on
Commit
5eef042
·
verified ·
1 Parent(s): 708a59e

Training in progress, step 260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abad106b26a177c80a55998c565b87e8d8e322882f0ee0d82dfa1bc33ce78be
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62dd58f5f07e9cfe5165e4ee91690a196110eda1989f229f57e089616cbea092
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15391496ce979fc31bfa4b89cf7a1b8ef5dc88ae859e2b82b3f5c1003f85f097
3
- size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6e698cb8a9f1b2feee9bca00e0a141b2948dbdee0009ec8f7c9aeccb939cad8
3
+ size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92f080d6a96737273ff3f2c3aa9a0b978c372496e285abadb5ac87a0c88bd369
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab3419023c7faeaec921cfbb345173fbd2d7548f26f89a44a67a4f452b92d5d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727d568946636c509efe7945204da61e120d33ba9eb30256473171e8dfb29ed3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b8e5ce2718054463127476242d5f2ee90d1229b9c9ce677374004099bb2fb5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3299902944031058,
5
  "eval_steps": 386,
6
- "global_step": 255,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1800,6 +1800,41 @@
1800
  "learning_rate": 9.92873715063483e-05,
1801
  "loss": 0.9571,
1802
  "step": 255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1803
  }
1804
  ],
1805
  "logging_steps": 1,
@@ -1819,7 +1854,7 @@
1819
  "attributes": {}
1820
  }
1821
  },
1822
- "total_flos": 2.8507397878185984e+17,
1823
  "train_batch_size": 4,
1824
  "trial_name": null,
1825
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.33646069233257847,
5
  "eval_steps": 386,
6
+ "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1800
  "learning_rate": 9.92873715063483e-05,
1801
  "loss": 0.9571,
1802
  "step": 255
1803
+ },
1804
+ {
1805
+ "epoch": 0.33128437398900035,
1806
+ "grad_norm": 1.0638468265533447,
1807
+ "learning_rate": 9.92804188409717e-05,
1808
+ "loss": 1.0293,
1809
+ "step": 256
1810
+ },
1811
+ {
1812
+ "epoch": 0.33257845357489485,
1813
+ "grad_norm": 0.7083877325057983,
1814
+ "learning_rate": 9.927343266947356e-05,
1815
+ "loss": 0.875,
1816
+ "step": 257
1817
+ },
1818
+ {
1819
+ "epoch": 0.3338725331607894,
1820
+ "grad_norm": 0.7915517091751099,
1821
+ "learning_rate": 9.92664129966038e-05,
1822
+ "loss": 0.8848,
1823
+ "step": 258
1824
+ },
1825
+ {
1826
+ "epoch": 0.3351666127466839,
1827
+ "grad_norm": 0.8054295182228088,
1828
+ "learning_rate": 9.925935982713518e-05,
1829
+ "loss": 0.8981,
1830
+ "step": 259
1831
+ },
1832
+ {
1833
+ "epoch": 0.33646069233257847,
1834
+ "grad_norm": 1.012574553489685,
1835
+ "learning_rate": 9.925227316586316e-05,
1836
+ "loss": 0.8119,
1837
+ "step": 260
1838
  }
1839
  ],
1840
  "logging_steps": 1,
 
1854
  "attributes": {}
1855
  }
1856
  },
1857
+ "total_flos": 2.906636646403277e+17,
1858
  "train_batch_size": 4,
1859
  "trial_name": null,
1860
  "trial_params": null