ncbateman commited on
Commit
5a3d136
·
verified ·
1 Parent(s): 441daef

Training in progress, step 645, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8721ee43349bfc7e39016efefeb84f0798023653ef25fbfec79493feb5cad8e
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08b6d9b3a9d15d25aadb2cb24debe2d7cdca1e55bd48ea3702a43a94a2581af
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0b13d732b2a8dad952ee020e87f52fea1e5a59d34717eed4f8c6474e09d2124
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95e8afeb39ff0d445ff047308657bd1c41dd23bbe6bc9ee6297f75a79be822b
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3037bb99ad816203fc49047110ff2f6ec00b478885f7281330c502f80f4d07f6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4171439eadb13b2843ff5c47858b0150e7075e96e555aee2539438f15e25168
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc7648e34cd3e7e1b0e3d71e0e1fe805f3949228899d535acad963d0ca07c4a6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea258fcfaa803fc3574884ee61cef84bd44233621e9e6a5cc1b7b02d59d4cca3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8282109349725008,
5
  "eval_steps": 386,
6
- "global_step": 640,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4503,6 +4503,41 @@
4503
  "learning_rate": 9.419888751998767e-05,
4504
  "loss": 0.7984,
4505
  "step": 640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4506
  }
4507
  ],
4508
  "logging_steps": 1,
@@ -4522,7 +4557,7 @@
4522
  "attributes": {}
4523
  }
4524
  },
4525
- "total_flos": 7.154797898838835e+17,
4526
  "train_batch_size": 4,
4527
  "trial_name": null,
4528
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8346813329019734,
5
  "eval_steps": 386,
6
+ "global_step": 645,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4503
  "learning_rate": 9.419888751998767e-05,
4504
  "loss": 0.7984,
4505
  "step": 640
4506
+ },
4507
+ {
4508
+ "epoch": 0.8295050145583953,
4509
+ "grad_norm": 0.8296061754226685,
4510
+ "learning_rate": 9.417959709536078e-05,
4511
+ "loss": 0.8597,
4512
+ "step": 641
4513
+ },
4514
+ {
4515
+ "epoch": 0.8307990941442899,
4516
+ "grad_norm": 0.8495222330093384,
4517
+ "learning_rate": 9.416027663269881e-05,
4518
+ "loss": 0.8396,
4519
+ "step": 642
4520
+ },
4521
+ {
4522
+ "epoch": 0.8320931737301844,
4523
+ "grad_norm": 0.8501962423324585,
4524
+ "learning_rate": 9.414092614513787e-05,
4525
+ "loss": 0.9911,
4526
+ "step": 643
4527
+ },
4528
+ {
4529
+ "epoch": 0.833387253316079,
4530
+ "grad_norm": 0.7546764612197876,
4531
+ "learning_rate": 9.412154564583448e-05,
4532
+ "loss": 0.9043,
4533
+ "step": 644
4534
+ },
4535
+ {
4536
+ "epoch": 0.8346813329019734,
4537
+ "grad_norm": 0.8009942770004272,
4538
+ "learning_rate": 9.410213514796564e-05,
4539
+ "loss": 0.8242,
4540
+ "step": 645
4541
  }
4542
  ],
4543
  "logging_steps": 1,
 
4557
  "attributes": {}
4558
  }
4559
  },
4560
+ "total_flos": 7.210694757423514e+17,
4561
  "train_batch_size": 4,
4562
  "trial_name": null,
4563
  "trial_params": null