ncbateman commited on
Commit
2c5dc00
·
verified ·
1 Parent(s): aef014d

Training in progress, step 35, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7c25e36ef1bea88e955b04a392995659ce2efa2958e9a824de856926f2f78b2
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0904a44c1929f97e8bd4e5c46a96a8a3044de6f935bcf5630acdb0cdb6d739
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc69a4cb3de67ebaa3a95161b888f3e6a62143841950ec7d93681c428ce896bf
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ef6f282d85d8e63c53b00534e682ea5282bac1edb49ab13b1b78e144354038b
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e8303210a87e6366e53f9d2ad1dc5984114aa017ddbff7d118553d8efe51202
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e24fd0a4708a45e9b32be5aab9d4589ac1e498dcf2ba55e9e776e2a6e66e9b62
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a73e1ff9beffc13aa54f4adf4df9ed4ad8819cc503c53ddfd100ef74e91d520
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a97edf5ef9280d040e46685cc4e47c24383c42a51949ff834379ab1766a8b0a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.038822387576835975,
5
  "eval_steps": 386,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -225,6 +225,41 @@
225
  "learning_rate": 6e-05,
226
  "loss": 1.3808,
227
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
229
  ],
230
  "logging_steps": 1,
@@ -244,7 +279,7 @@
244
  "attributes": {}
245
  }
246
  },
247
- "total_flos": 3.353811515080704e+16,
248
  "train_batch_size": 4,
249
  "trial_name": null,
250
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.045292785506308636,
5
  "eval_steps": 386,
6
+ "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
225
  "learning_rate": 6e-05,
226
  "loss": 1.3808,
227
  "step": 30
228
+ },
229
+ {
230
+ "epoch": 0.040116467162730506,
231
+ "grad_norm": 1.353873372077942,
232
+ "learning_rate": 6.2e-05,
233
+ "loss": 1.229,
234
+ "step": 31
235
+ },
236
+ {
237
+ "epoch": 0.04141054674862504,
238
+ "grad_norm": 1.2547746896743774,
239
+ "learning_rate": 6.400000000000001e-05,
240
+ "loss": 1.1668,
241
+ "step": 32
242
+ },
243
+ {
244
+ "epoch": 0.042704626334519574,
245
+ "grad_norm": 1.3806778192520142,
246
+ "learning_rate": 6.6e-05,
247
+ "loss": 1.0691,
248
+ "step": 33
249
+ },
250
+ {
251
+ "epoch": 0.043998705920414105,
252
+ "grad_norm": 1.2815773487091064,
253
+ "learning_rate": 6.800000000000001e-05,
254
+ "loss": 1.2409,
255
+ "step": 34
256
+ },
257
+ {
258
+ "epoch": 0.045292785506308636,
259
+ "grad_norm": 1.3677266836166382,
260
+ "learning_rate": 7e-05,
261
+ "loss": 0.9668,
262
+ "step": 35
263
  }
264
  ],
265
  "logging_steps": 1,
 
279
  "attributes": {}
280
  }
281
  },
282
+ "total_flos": 3.912780100927488e+16,
283
  "train_batch_size": 4,
284
  "trial_name": null,
285
  "trial_params": null