bhuvanmdev commited on
Commit
1eff942
1 Parent(s): b0e50b7

Training in progress, step 1060, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:741f1e82263544b2af2ddd5880bd1151acdef154443223e1af5fa74c84fa4c5b
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83db771df950e62ad2023cec1215f51219493e18859b93162135ea811647b4ee
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5e875a68f7de1135ec7eff6679cca0153967f574ae2078888d36849fcc7fa49
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b7dbda1b23eed1009be33bc92a27f23738750bb760895a3877eb8413e63752
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09e6b49e9c75de37719a5ce5ce7ac22edb82ba14494ff3f8378eb2a2213d04a5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7097ad8e44eb73dccc917ee5a890bd970cf5e65b02ff96ff5daa468616d769c6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8540a59a904db0631e6ba6215ec79696a37252ba8141132429ae3f3eee3075b9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ed300d9a9b5101b0a51972c41bba244062680bd71a8f222001fe6d7c1bf120
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.37209302325581395,
5
  "eval_steps": 500,
6
- "global_step": 1040,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -839,14 +839,30 @@
839
  "loss": 0.4253,
840
  "num_input_tokens_seen": 695760,
841
  "step": 1040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
  }
843
  ],
844
  "logging_steps": 10,
845
  "max_steps": 2795,
846
- "num_input_tokens_seen": 695760,
847
  "num_train_epochs": 1,
848
  "save_steps": 20,
849
- "total_flos": 1.564518539870208e+16,
850
  "train_batch_size": 1,
851
  "trial_name": null,
852
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.37924865831842575,
5
  "eval_steps": 500,
6
+ "global_step": 1060,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
839
  "loss": 0.4253,
840
  "num_input_tokens_seen": 695760,
841
  "step": 1040
842
+ },
843
+ {
844
+ "epoch": 0.3756708407871199,
845
+ "grad_norm": 0.26929447054862976,
846
+ "learning_rate": 0.00012486583184257604,
847
+ "loss": 0.405,
848
+ "num_input_tokens_seen": 702561,
849
+ "step": 1050
850
+ },
851
+ {
852
+ "epoch": 0.37924865831842575,
853
+ "grad_norm": 0.24823708832263947,
854
+ "learning_rate": 0.00012415026833631485,
855
+ "loss": 0.4317,
856
+ "num_input_tokens_seen": 710659,
857
+ "step": 1060
858
  }
859
  ],
860
  "logging_steps": 10,
861
  "max_steps": 2795,
862
+ "num_input_tokens_seen": 710659,
863
  "num_train_epochs": 1,
864
  "save_steps": 20,
865
+ "total_flos": 1.5980211294492672e+16,
866
  "train_batch_size": 1,
867
  "trial_name": null,
868
  "trial_params": null