bhuvanmdev commited on
Commit
ff8c332
1 Parent(s): 91b39b7

Training in progress, step 2020, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5f86dd9371b781155ba643a548b4de3e51acb102f7bc37f5826e1d50947b2f0
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b6655782a983e665dba0e849d536995a49fc28327e1a340c6d0acb3e6a9346
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04e3e81d18fadc24063e623e9d4cc0d12eb575ca5424a5a8f3ebf2648240568b
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf5ba4c5bdffdca097a4cab622da35d4625e043ff5ff81b133e63498d06dbfc
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2191a9ab0ab41e50985488533d64dae12c867360e85f064297a87e2f978e6536
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c676bb33648ee9263046f9d978c9dc81390c0ac42995b13679bcd936e804701
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16253ca7555b24e595f70e545999ce4ef3d7193309d16f3cd6b13443ca20da3e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1288e563fd14386ce1f1f209b36f861a91ceda68715fc12dfb92acd5d04d997
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7155635062611807,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1607,14 +1607,30 @@
1607
  "loss": 0.4041,
1608
  "num_input_tokens_seen": 1352944,
1609
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1610
  }
1611
  ],
1612
  "logging_steps": 10,
1613
  "max_steps": 2795,
1614
- "num_input_tokens_seen": 1352944,
1615
  "num_train_epochs": 1,
1616
  "save_steps": 20,
1617
- "total_flos": 3.042293278438195e+16,
1618
  "train_batch_size": 1,
1619
  "trial_name": null,
1620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7227191413237924,
5
  "eval_steps": 500,
6
+ "global_step": 2020,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1607
  "loss": 0.4041,
1608
  "num_input_tokens_seen": 1352944,
1609
  "step": 2000
1610
+ },
1611
+ {
1612
+ "epoch": 0.7191413237924866,
1613
+ "grad_norm": 0.3060110807418823,
1614
+ "learning_rate": 5.6171735241502685e-05,
1615
+ "loss": 0.3943,
1616
+ "num_input_tokens_seen": 1359671,
1617
+ "step": 2010
1618
+ },
1619
+ {
1620
+ "epoch": 0.7227191413237924,
1621
+ "grad_norm": 0.42584308981895447,
1622
+ "learning_rate": 5.5456171735241505e-05,
1623
+ "loss": 0.3982,
1624
+ "num_input_tokens_seen": 1366487,
1625
+ "step": 2020
1626
  }
1627
  ],
1628
  "logging_steps": 10,
1629
  "max_steps": 2795,
1630
+ "num_input_tokens_seen": 1366487,
1631
  "num_train_epochs": 1,
1632
  "save_steps": 20,
1633
+ "total_flos": 3.0727467028740096e+16,
1634
  "train_batch_size": 1,
1635
  "trial_name": null,
1636
  "trial_params": null