ncbateman commited on
Commit
9384bc5
·
verified ·
1 Parent(s): 479a054

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e681c7412ef2557f51f437204f7dbbccb418dccefea52569f201f7a05c61feb
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c25e36ef1bea88e955b04a392995659ce2efa2958e9a824de856926f2f78b2
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:196d52d70a6910ff8c67869e7cbbe76c44d57cc489019ff6d2baf29ba21ca21c
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc69a4cb3de67ebaa3a95161b888f3e6a62143841950ec7d93681c428ce896bf
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bec6c83fbb7d43296cc5ab0e300576282a47308ba5787731efc1f099f27e291
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8303210a87e6366e53f9d2ad1dc5984114aa017ddbff7d118553d8efe51202
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:814b79b47e12bde76b22a1ac4fd2f1e7ddf84c332d0b19c3538a1fad1c6cc96e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a73e1ff9beffc13aa54f4adf4df9ed4ad8819cc503c53ddfd100ef74e91d520
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03235198964736331,
5
  "eval_steps": 386,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -190,6 +190,41 @@
190
  "learning_rate": 5e-05,
191
  "loss": 1.4606,
192
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  }
194
  ],
195
  "logging_steps": 1,
@@ -209,7 +244,7 @@
209
  "attributes": {}
210
  }
211
  },
212
- "total_flos": 2.79484292923392e+16,
213
  "train_batch_size": 4,
214
  "trial_name": null,
215
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.038822387576835975,
5
  "eval_steps": 386,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
190
  "learning_rate": 5e-05,
191
  "loss": 1.4606,
192
  "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.033646069233257844,
196
+ "grad_norm": 1.5480064153671265,
197
+ "learning_rate": 5.2000000000000004e-05,
198
+ "loss": 1.5027,
199
+ "step": 26
200
+ },
201
+ {
202
+ "epoch": 0.034940148819152375,
203
+ "grad_norm": 1.6736445426940918,
204
+ "learning_rate": 5.4000000000000005e-05,
205
+ "loss": 1.2426,
206
+ "step": 27
207
+ },
208
+ {
209
+ "epoch": 0.03623422840504691,
210
+ "grad_norm": 1.7392551898956299,
211
+ "learning_rate": 5.6000000000000006e-05,
212
+ "loss": 1.4703,
213
+ "step": 28
214
+ },
215
+ {
216
+ "epoch": 0.037528307990941444,
217
+ "grad_norm": 1.6173359155654907,
218
+ "learning_rate": 5.8e-05,
219
+ "loss": 1.4546,
220
+ "step": 29
221
+ },
222
+ {
223
+ "epoch": 0.038822387576835975,
224
+ "grad_norm": 1.3955802917480469,
225
+ "learning_rate": 6e-05,
226
+ "loss": 1.3808,
227
+ "step": 30
228
  }
229
  ],
230
  "logging_steps": 1,
 
244
  "attributes": {}
245
  }
246
  },
247
+ "total_flos": 3.353811515080704e+16,
248
  "train_batch_size": 4,
249
  "trial_name": null,
250
  "trial_params": null