bhuvanmdev commited on
Commit
7d46522
1 Parent(s): 4cc500a

Training in progress, step 2340, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff6279bb72a1a41fb48956b4feb1a93393d18d4e6392ad78211fc04c9f099df2
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8abb44f6aca403f71064a572c838010e4fcf7984d9faf66f884911976f427e2c
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74ccf80491212ab3533b791bb0a94ac69d6d47751d6deeebce7061fdefd9b30f
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5421b6835db3269773718c76f42b20f1029476f55863aace65468871b75acead
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19bb1874a3bf4e8cd76c40201bfecc47f7333e535e927638bb37015a23fd28aa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2906515e1bcbf65ec65f30890fb86fc07abf4f380736f4d75d70f44ddd1a161
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:830ffa36a2cc85b79b79c8f68dead7aaec7fb58f5ebad1169970ef47fdf22b5f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0edca768de2b612c93b0f82e9eb2540d009ac62af5eb1b92baccfef4a526aeb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8300536672629696,
5
  "eval_steps": 500,
6
- "global_step": 2320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1863,14 +1863,30 @@
1863
  "loss": 0.3971,
1864
  "num_input_tokens_seen": 1572070,
1865
  "step": 2320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1866
  }
1867
  ],
1868
  "logging_steps": 10,
1869
  "max_steps": 2795,
1870
- "num_input_tokens_seen": 1572070,
1871
  "num_train_epochs": 1,
1872
  "save_steps": 20,
1873
- "total_flos": 3.535030270457856e+16,
1874
  "train_batch_size": 1,
1875
  "trial_name": null,
1876
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8372093023255814,
5
  "eval_steps": 500,
6
+ "global_step": 2340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1863
  "loss": 0.3971,
1864
  "num_input_tokens_seen": 1572070,
1865
  "step": 2320
1866
+ },
1867
+ {
1868
+ "epoch": 0.8336314847942755,
1869
+ "grad_norm": 0.3011910021305084,
1870
+ "learning_rate": 3.3273703041144904e-05,
1871
+ "loss": 0.4008,
1872
+ "num_input_tokens_seen": 1579014,
1873
+ "step": 2330
1874
+ },
1875
+ {
1876
+ "epoch": 0.8372093023255814,
1877
+ "grad_norm": 0.3195567727088928,
1878
+ "learning_rate": 3.2558139534883724e-05,
1879
+ "loss": 0.362,
1880
+ "num_input_tokens_seen": 1586863,
1881
+ "step": 2340
1882
  }
1883
  ],
1884
  "logging_steps": 10,
1885
  "max_steps": 2795,
1886
+ "num_input_tokens_seen": 1586863,
1887
  "num_train_epochs": 1,
1888
  "save_steps": 20,
1889
+ "total_flos": 3.5682945034696704e+16,
1890
  "train_batch_size": 1,
1891
  "trial_name": null,
1892
  "trial_params": null