ncbateman commited on
Commit
daad29a
·
verified ·
1 Parent(s): 8307f73

Training in progress, step 485, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7021ae9619a321c4fe6163d1c096061cc13b61a48ce860e6b0f7790a0de1a45
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a6b59d01849a163702b8e50607c498745112dc6ced09ce72986bc17ec447c5
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b4fc0e2d4071b340f24f1590dacac63838b744b2dd6aa10c6c78993a30b8bf5
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c943a007ca820031ad467134581d9a1d6d566dbef8d77e1cbb5e854f9dbe6f0
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:032f2d26c59f5362e1f97b89368915c6408089f13037766115fb561ba9953a00
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2c2f577326b2b04f054023f26d112b121ac8ff4bf7c93ae38255948cc3a2c22
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc0aea3ba173d77bf5e35d75c17e1d368377724dd79bc59ed76d246cf3682b43
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f4b94f1a44c59895f464174a55ad4567f9ad6cb9a5184c25fed320934f9e0d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6211582012293756,
5
  "eval_steps": 386,
6
- "global_step": 480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3383,6 +3383,41 @@
3383
  "learning_rate": 9.688991567458933e-05,
3384
  "loss": 0.7721,
3385
  "step": 480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3386
  }
3387
  ],
3388
  "logging_steps": 1,
@@ -3402,7 +3437,7 @@
3402
  "attributes": {}
3403
  }
3404
  },
3405
- "total_flos": 5.3660984241291264e+17,
3406
  "train_batch_size": 4,
3407
  "trial_name": null,
3408
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6276285991588483,
5
  "eval_steps": 386,
6
+ "global_step": 485,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3383
  "learning_rate": 9.688991567458933e-05,
3384
  "loss": 0.7721,
3385
  "step": 480
3386
+ },
3387
+ {
3388
+ "epoch": 0.6224522808152702,
3389
+ "grad_norm": 0.7499825954437256,
3390
+ "learning_rate": 9.687558608994232e-05,
3391
+ "loss": 0.8728,
3392
+ "step": 481
3393
+ },
3394
+ {
3395
+ "epoch": 0.6237463604011647,
3396
+ "grad_norm": 0.871703028678894,
3397
+ "learning_rate": 9.686122463423732e-05,
3398
+ "loss": 0.88,
3399
+ "step": 482
3400
+ },
3401
+ {
3402
+ "epoch": 0.6250404399870592,
3403
+ "grad_norm": 0.8204110860824585,
3404
+ "learning_rate": 9.684683131723884e-05,
3405
+ "loss": 0.8569,
3406
+ "step": 483
3407
+ },
3408
+ {
3409
+ "epoch": 0.6263345195729537,
3410
+ "grad_norm": 0.8779101371765137,
3411
+ "learning_rate": 9.683240614873294e-05,
3412
+ "loss": 0.901,
3413
+ "step": 484
3414
+ },
3415
+ {
3416
+ "epoch": 0.6276285991588483,
3417
+ "grad_norm": 0.8231368064880371,
3418
+ "learning_rate": 9.681794913852746e-05,
3419
+ "loss": 0.989,
3420
+ "step": 485
3421
  }
3422
  ],
3423
  "logging_steps": 1,
 
3437
  "attributes": {}
3438
  }
3439
  },
3440
+ "total_flos": 5.421995282713805e+17,
3441
  "train_batch_size": 4,
3442
  "trial_name": null,
3443
  "trial_params": null