ncbateman commited on
Commit
02cb7a9
·
verified ·
1 Parent(s): 9648956

Training in progress, step 385, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:158ccb26f9051a1ce9a47cd393a551e5dce6c352732882a27c1b88d8d4e2f682
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ad5bb6a07b5358dccb40a92e32494631d5f1c697661e9c9628e3e1b798cad32
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa40f3c08f8aa8762162a52339fec13192340a007a872adbd2a0699f5af2f111
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4f1b6dde27d8af2c96a8d1851af11d6c5644f5a3c144dfa41d9597eeffc1f83
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37239226899f264c9f793826b646f1103d28fdf8250bd2a9877ca22fe4056168
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcc07ce19b7fd4f90004a8e11f50490415cba07804798230ae98a4519365742a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39411f8be444761c5218c2d62e475489e6eb0154d2c02b4d166b577f908343d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51fbcb00d7a67643a5230f4ba48fcd85e7a8215e6d43c6acb6e42d34667255bc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.49175024263992234,
5
  "eval_steps": 386,
6
- "global_step": 380,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2675,6 +2675,41 @@
2675
  "learning_rate": 9.816034510373286e-05,
2676
  "loss": 0.9889,
2677
  "step": 380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2678
  }
2679
  ],
2680
  "logging_steps": 1,
@@ -2694,7 +2729,7 @@
2694
  "attributes": {}
2695
  }
2696
  },
2697
- "total_flos": 4.2481612524355584e+17,
2698
  "train_batch_size": 4,
2699
  "trial_name": null,
2700
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.498220640569395,
5
  "eval_steps": 386,
6
+ "global_step": 385,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2675
  "learning_rate": 9.816034510373286e-05,
2676
  "loss": 0.9889,
2677
  "step": 380
2678
+ },
2679
+ {
2680
+ "epoch": 0.4930443222258169,
2681
+ "grad_norm": 0.829309344291687,
2682
+ "learning_rate": 9.814924818785865e-05,
2683
+ "loss": 0.8139,
2684
+ "step": 381
2685
+ },
2686
+ {
2687
+ "epoch": 0.4943384018117114,
2688
+ "grad_norm": 0.83943110704422,
2689
+ "learning_rate": 9.81381185349542e-05,
2690
+ "loss": 0.9202,
2691
+ "step": 382
2692
+ },
2693
+ {
2694
+ "epoch": 0.49563248139760596,
2695
+ "grad_norm": 0.7981933355331421,
2696
+ "learning_rate": 9.812695615258662e-05,
2697
+ "loss": 0.9131,
2698
+ "step": 383
2699
+ },
2700
+ {
2701
+ "epoch": 0.49692656098350046,
2702
+ "grad_norm": 0.7930905818939209,
2703
+ "learning_rate": 9.81157610483453e-05,
2704
+ "loss": 0.769,
2705
+ "step": 384
2706
+ },
2707
+ {
2708
+ "epoch": 0.498220640569395,
2709
+ "grad_norm": 0.8699679970741272,
2710
+ "learning_rate": 9.81045332298419e-05,
2711
+ "loss": 0.9468,
2712
+ "step": 385
2713
  }
2714
  ],
2715
  "logging_steps": 1,
 
2729
  "attributes": {}
2730
  }
2731
  },
2732
+ "total_flos": 4.304058111020237e+17,
2733
  "train_batch_size": 4,
2734
  "trial_name": null,
2735
  "trial_params": null