bhuvanmdev commited on
Commit
e1a2b04
1 Parent(s): d325efa

Training in progress, step 2220, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbc4b1ee61a97280ee29a7f03d65f0615973d396283c0c1aa8a9c199445811fe
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4cb03d3561140a6dc960340bfc20239d79b812171f8665033b7d5950060c16
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:012e45346ca36f5757826a56705c7bc60bbc32817df65a5df6cb0bf846ce5a5f
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02194e64ab0e46b57c09b90a41edf3c57c35eb6a761d021610ca25fe7722be96
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7cee742af04b6dcd02cfc87def2192f9af2a689dec2ee074b90937e1adbaf4e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2417621c0c251f9779f9e28a0066e04b9dea3b724de678a9d2863d36bd10fb07
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d19068f9fb75a389d67a2a6a34b0327497f2ac8cc34c17114479ba846240d9b4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcda9d1b5e596e9a16f8101054c6161f73e1500ea7e6c07e8c2aa6c3a698c4fe
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7871198568872988,
5
  "eval_steps": 500,
6
- "global_step": 2200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1767,14 +1767,30 @@
1767
  "loss": 0.4195,
1768
  "num_input_tokens_seen": 1488275,
1769
  "step": 2200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1770
  }
1771
  ],
1772
  "logging_steps": 10,
1773
  "max_steps": 2795,
1774
- "num_input_tokens_seen": 1488275,
1775
  "num_train_epochs": 1,
1776
  "save_steps": 20,
1777
- "total_flos": 3.34660490675712e+16,
1778
  "train_batch_size": 1,
1779
  "trial_name": null,
1780
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7942754919499105,
5
  "eval_steps": 500,
6
+ "global_step": 2220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1767
  "loss": 0.4195,
1768
  "num_input_tokens_seen": 1488275,
1769
  "step": 2200
1770
+ },
1771
+ {
1772
+ "epoch": 0.7906976744186046,
1773
+ "grad_norm": 0.31744202971458435,
1774
+ "learning_rate": 4.186046511627907e-05,
1775
+ "loss": 0.3672,
1776
+ "num_input_tokens_seen": 1496031,
1777
+ "step": 2210
1778
+ },
1779
+ {
1780
+ "epoch": 0.7942754919499105,
1781
+ "grad_norm": 0.3008958697319031,
1782
+ "learning_rate": 4.114490161001789e-05,
1783
+ "loss": 0.3809,
1784
+ "num_input_tokens_seen": 1503138,
1785
+ "step": 2220
1786
  }
1787
  ],
1788
  "logging_steps": 10,
1789
  "max_steps": 2795,
1790
+ "num_input_tokens_seen": 1503138,
1791
  "num_train_epochs": 1,
1792
  "save_steps": 20,
1793
+ "total_flos": 3.3800265450491904e+16,
1794
  "train_batch_size": 1,
1795
  "trial_name": null,
1796
  "trial_params": null