ncbateman commited on
Commit
cfa1c63
·
verified ·
1 Parent(s): 4ecd0f2

Training in progress, step 395, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b63a9540fa7412ca48380046bfa8cfe6ba36c7a222a8f8ddd5253942cfdf523a
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebe659d2f76c3b6d0004991e20f559679d6cf72d52e6cf39d03f3a1993703e4
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76f198036b0c9f1dbf756dfc785905c4dedca2e73a89d2c7e455140f93d15c9a
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae61259f2e63dcfe7725038bb24c483967928f6ef0d982a8f8cd2b593a116162
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62f9db20d138e666739444e689769719e8c52e45b26e4605c90eb0139d43c213
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba118175917d0b3967d192cc434ab0812fa81db5e2563d542f010b46a09a3fbb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85cca89790b4363b0104a6549c66e609ee25936e2ae1c6e763e441ae3b5c53ff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6bf5e612f4fd4a32693b4794e80a1434e64b43319b927beb253a75b7d3b07a4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5046910384988677,
5
  "eval_steps": 386,
6
- "global_step": 390,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2753,6 +2753,41 @@
2753
  "learning_rate": 9.804790369135718e-05,
2754
  "loss": 0.8657,
2755
  "step": 390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2756
  }
2757
  ],
2758
  "logging_steps": 1,
@@ -2772,7 +2807,7 @@
2772
  "attributes": {}
2773
  }
2774
  },
2775
- "total_flos": 4.359954969604915e+17,
2776
  "train_batch_size": 4,
2777
  "trial_name": null,
2778
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5111614364283403,
5
  "eval_steps": 386,
6
+ "global_step": 395,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2753
  "learning_rate": 9.804790369135718e-05,
2754
  "loss": 0.8657,
2755
  "step": 390
2756
+ },
2757
+ {
2758
+ "epoch": 0.5059851180847622,
2759
+ "grad_norm": 0.8373358845710754,
2760
+ "learning_rate": 9.80364797483707e-05,
2761
+ "loss": 0.9175,
2762
+ "step": 391
2763
+ },
2764
+ {
2765
+ "epoch": 0.5072791976706568,
2766
+ "grad_norm": 0.8288902640342712,
2767
+ "learning_rate": 9.802502314502607e-05,
2768
+ "loss": 0.7463,
2769
+ "step": 392
2770
+ },
2771
+ {
2772
+ "epoch": 0.5085732772565513,
2773
+ "grad_norm": 0.6780114769935608,
2774
+ "learning_rate": 9.801353388911269e-05,
2775
+ "loss": 0.7973,
2776
+ "step": 393
2777
+ },
2778
+ {
2779
+ "epoch": 0.5098673568424458,
2780
+ "grad_norm": 0.9328367710113525,
2781
+ "learning_rate": 9.800201198844221e-05,
2782
+ "loss": 1.0405,
2783
+ "step": 394
2784
+ },
2785
+ {
2786
+ "epoch": 0.5111614364283403,
2787
+ "grad_norm": 0.9010327458381653,
2788
+ "learning_rate": 9.799045745084847e-05,
2789
+ "loss": 1.1194,
2790
+ "step": 395
2791
  }
2792
  ],
2793
  "logging_steps": 1,
 
2807
  "attributes": {}
2808
  }
2809
  },
2810
+ "total_flos": 4.4158518281895936e+17,
2811
  "train_batch_size": 4,
2812
  "trial_name": null,
2813
  "trial_params": null