ncbateman commited on
Commit
1cedcec
·
verified ·
1 Parent(s): 46d8cd7

Training in progress, step 265, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62dd58f5f07e9cfe5165e4ee91690a196110eda1989f229f57e089616cbea092
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e35886bdcf00795dc56e4c37d7f8b9156e3af207c4f5197e9b99f856611064
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6e698cb8a9f1b2feee9bca00e0a141b2948dbdee0009ec8f7c9aeccb939cad8
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e666486e8cc62b97d906ea52ffc730c72ad81956be23d6dde2d1167414d1fd1
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ab3419023c7faeaec921cfbb345173fbd2d7548f26f89a44a67a4f452b92d5d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7e8cfd6e7b1242076647967dbc1bd8afa97abb68b4228e97c7e65538d0340d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b8e5ce2718054463127476242d5f2ee90d1229b9c9ce677374004099bb2fb5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e07629d02aee1dc6fe60d67e7c3e731bc344bba13e8c3da66188957923131f6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.33646069233257847,
5
  "eval_steps": 386,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1835,6 +1835,41 @@
1835
  "learning_rate": 9.925227316586316e-05,
1836
  "loss": 0.8119,
1837
  "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1838
  }
1839
  ],
1840
  "logging_steps": 1,
@@ -1854,7 +1889,7 @@
1854
  "attributes": {}
1855
  }
1856
  },
1857
- "total_flos": 2.906636646403277e+17,
1858
  "train_batch_size": 4,
1859
  "trial_name": null,
1860
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3429310902620511,
5
  "eval_steps": 386,
6
+ "global_step": 265,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1835
  "learning_rate": 9.925227316586316e-05,
1836
  "loss": 0.8119,
1837
  "step": 260
1838
+ },
1839
+ {
1840
+ "epoch": 0.337754771918473,
1841
+ "grad_norm": 0.8329979181289673,
1842
+ "learning_rate": 9.924515301760606e-05,
1843
+ "loss": 0.8467,
1844
+ "step": 261
1845
+ },
1846
+ {
1847
+ "epoch": 0.33904885150436753,
1848
+ "grad_norm": 0.801017701625824,
1849
+ "learning_rate": 9.923799938720488e-05,
1850
+ "loss": 0.8333,
1851
+ "step": 262
1852
+ },
1853
+ {
1854
+ "epoch": 0.34034293109026204,
1855
+ "grad_norm": 0.9083892703056335,
1856
+ "learning_rate": 9.923081227952347e-05,
1857
+ "loss": 0.8727,
1858
+ "step": 263
1859
+ },
1860
+ {
1861
+ "epoch": 0.3416370106761566,
1862
+ "grad_norm": 0.7917154431343079,
1863
+ "learning_rate": 9.922359169944834e-05,
1864
+ "loss": 1.0341,
1865
+ "step": 264
1866
+ },
1867
+ {
1868
+ "epoch": 0.3429310902620511,
1869
+ "grad_norm": 0.6865798234939575,
1870
+ "learning_rate": 9.921633765188886e-05,
1871
+ "loss": 0.9117,
1872
+ "step": 265
1873
  }
1874
  ],
1875
  "logging_steps": 1,
 
1889
  "attributes": {}
1890
  }
1891
  },
1892
+ "total_flos": 2.962533504987955e+17,
1893
  "train_batch_size": 4,
1894
  "trial_name": null,
1895
  "trial_params": null