ncbateman commited on
Commit
3bede70
1 Parent(s): 9ce31e4

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0904a44c1929f97e8bd4e5c46a96a8a3044de6f935bcf5630acdb0cdb6d739
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753c7f048d3c17a8dff05f7e48fa6cf023fc614649bb13ef4db1a60157f75a09
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ef6f282d85d8e63c53b00534e682ea5282bac1edb49ab13b1b78e144354038b
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9012b11cde3a2f0c54dbc6f706596bd3a5f30e4c957cf2683a03dee748e9f767
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e24fd0a4708a45e9b32be5aab9d4589ac1e498dcf2ba55e9e776e2a6e66e9b62
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6c5ab014d4aef79b98314290c49544dc6945723046a24993974bca5a70c4fbb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a97edf5ef9280d040e46685cc4e47c24383c42a51949ff834379ab1766a8b0a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1e5be422da84e599b3b273b39d45d7ca20a6f0b0460857ee0c0fe6a229a2c9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.045292785506308636,
5
  "eval_steps": 386,
6
- "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -260,6 +260,41 @@
260
  "learning_rate": 7e-05,
261
  "loss": 0.9668,
262
  "step": 35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  }
264
  ],
265
  "logging_steps": 1,
@@ -279,7 +314,7 @@
279
  "attributes": {}
280
  }
281
  },
282
- "total_flos": 3.912780100927488e+16,
283
  "train_batch_size": 4,
284
  "trial_name": null,
285
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0517631834357813,
5
  "eval_steps": 386,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
260
  "learning_rate": 7e-05,
261
  "loss": 0.9668,
262
  "step": 35
263
+ },
264
+ {
265
+ "epoch": 0.046586865092203174,
266
+ "grad_norm": 1.5457032918930054,
267
+ "learning_rate": 7.2e-05,
268
+ "loss": 1.1385,
269
+ "step": 36
270
+ },
271
+ {
272
+ "epoch": 0.047880944678097705,
273
+ "grad_norm": 1.5587060451507568,
274
+ "learning_rate": 7.4e-05,
275
+ "loss": 1.1707,
276
+ "step": 37
277
+ },
278
+ {
279
+ "epoch": 0.049175024263992236,
280
+ "grad_norm": 1.079053282737732,
281
+ "learning_rate": 7.6e-05,
282
+ "loss": 1.0655,
283
+ "step": 38
284
+ },
285
+ {
286
+ "epoch": 0.050469103849886766,
287
+ "grad_norm": 1.1773897409439087,
288
+ "learning_rate": 7.800000000000001e-05,
289
+ "loss": 1.0465,
290
+ "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.0517631834357813,
294
+ "grad_norm": 1.2437673807144165,
295
+ "learning_rate": 8e-05,
296
+ "loss": 1.2779,
297
+ "step": 40
298
  }
299
  ],
300
  "logging_steps": 1,
 
314
  "attributes": {}
315
  }
316
  },
317
+ "total_flos": 4.471748686774272e+16,
318
  "train_batch_size": 4,
319
  "trial_name": null,
320
  "trial_params": null