ncbateman commited on
Commit
d1e874e
1 Parent(s): c928fc0

Training in progress, step 45, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:753c7f048d3c17a8dff05f7e48fa6cf023fc614649bb13ef4db1a60157f75a09
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbef157b9600fdc30cd09681e635c3596d9d9f3a832f0d58792888176093d6e7
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9012b11cde3a2f0c54dbc6f706596bd3a5f30e4c957cf2683a03dee748e9f767
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16278e58faf54743e51c2cb86c8c7a4cef307e814c0ff1c955065184c7b3ffbf
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6c5ab014d4aef79b98314290c49544dc6945723046a24993974bca5a70c4fbb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0fa37673dbc0bf8164a593d75abf6e8c224c2ea3193e0f86621fb2aa8c6ed6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b1e5be422da84e599b3b273b39d45d7ca20a6f0b0460857ee0c0fe6a229a2c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1931e4016bf28b07346e79413c71e240f12f43909f7431de607b5c05407707
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0517631834357813,
5
  "eval_steps": 386,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -295,6 +295,41 @@
295
  "learning_rate": 8e-05,
296
  "loss": 1.2779,
297
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  }
299
  ],
300
  "logging_steps": 1,
@@ -314,7 +349,7 @@
314
  "attributes": {}
315
  }
316
  },
317
- "total_flos": 4.471748686774272e+16,
318
  "train_batch_size": 4,
319
  "trial_name": null,
320
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.058233581365253966,
5
  "eval_steps": 386,
6
+ "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
295
  "learning_rate": 8e-05,
296
  "loss": 1.2779,
297
  "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.053057263021675835,
301
+ "grad_norm": 1.254847526550293,
302
+ "learning_rate": 8.2e-05,
303
+ "loss": 1.0898,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.054351342607570366,
308
+ "grad_norm": 1.1771515607833862,
309
+ "learning_rate": 8.4e-05,
310
+ "loss": 1.1827,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.0556454221934649,
315
+ "grad_norm": 1.1400648355484009,
316
+ "learning_rate": 8.6e-05,
317
+ "loss": 1.1066,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.05693950177935943,
322
+ "grad_norm": 1.2047138214111328,
323
+ "learning_rate": 8.800000000000001e-05,
324
+ "loss": 0.8974,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.058233581365253966,
329
+ "grad_norm": 1.1269346475601196,
330
+ "learning_rate": 9e-05,
331
+ "loss": 1.0146,
332
+ "step": 45
333
  }
334
  ],
335
  "logging_steps": 1,
 
349
  "attributes": {}
350
  }
351
  },
352
+ "total_flos": 5.030717272621056e+16,
353
  "train_batch_size": 4,
354
  "trial_name": null,
355
  "trial_params": null