ncbateman commited on
Commit
5795aaa
1 Parent(s): 0100680

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d08b6d9b3a9d15d25aadb2cb24debe2d7cdca1e55bd48ea3702a43a94a2581af
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3668fa34724afb7451663beb5c2888ddcb2b7ecddb13ed07d944025c27d8d3b
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b95e8afeb39ff0d445ff047308657bd1c41dd23bbe6bc9ee6297f75a79be822b
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03eb30e093c78829d55e5b35712320efbf2416ee2737745a276186c084bbdecb
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4171439eadb13b2843ff5c47858b0150e7075e96e555aee2539438f15e25168
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f89037ac2abeb6171c1ce95a77a94c808e277b9f70abf8b11f093a79735219d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea258fcfaa803fc3574884ee61cef84bd44233621e9e6a5cc1b7b02d59d4cca3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f40d91018758ada40c1cb9514ff9b9e8ed129fd32f51a6eaa9ac898e033c2d44
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8346813329019734,
5
  "eval_steps": 386,
6
- "global_step": 645,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4538,6 +4538,41 @@
4538
  "learning_rate": 9.410213514796564e-05,
4539
  "loss": 0.8242,
4540
  "step": 645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4541
  }
4542
  ],
4543
  "logging_steps": 1,
@@ -4557,7 +4592,7 @@
4557
  "attributes": {}
4558
  }
4559
  },
4560
- "total_flos": 7.210694757423514e+17,
4561
  "train_batch_size": 4,
4562
  "trial_name": null,
4563
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8411517308314461,
5
  "eval_steps": 386,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4538
  "learning_rate": 9.410213514796564e-05,
4539
  "loss": 0.8242,
4540
  "step": 645
4541
+ },
4542
+ {
4543
+ "epoch": 0.835975412487868,
4544
+ "grad_norm": 0.8321415185928345,
4545
+ "learning_rate": 9.408269466472864e-05,
4546
+ "loss": 0.9094,
4547
+ "step": 646
4548
+ },
4549
+ {
4550
+ "epoch": 0.8372694920737626,
4551
+ "grad_norm": 0.7805325984954834,
4552
+ "learning_rate": 9.406322420934123e-05,
4553
+ "loss": 0.837,
4554
+ "step": 647
4555
+ },
4556
+ {
4557
+ "epoch": 0.8385635716596571,
4558
+ "grad_norm": 0.8147938251495361,
4559
+ "learning_rate": 9.404372379504151e-05,
4560
+ "loss": 0.9177,
4561
+ "step": 648
4562
+ },
4563
+ {
4564
+ "epoch": 0.8398576512455516,
4565
+ "grad_norm": 0.7518514394760132,
4566
+ "learning_rate": 9.402419343508797e-05,
4567
+ "loss": 0.7562,
4568
+ "step": 649
4569
+ },
4570
+ {
4571
+ "epoch": 0.8411517308314461,
4572
+ "grad_norm": 0.7858056426048279,
4573
+ "learning_rate": 9.400463314275943e-05,
4574
+ "loss": 0.9865,
4575
+ "step": 650
4576
  }
4577
  ],
4578
  "logging_steps": 1,
 
4592
  "attributes": {}
4593
  }
4594
  },
4595
+ "total_flos": 7.266591616008192e+17,
4596
  "train_batch_size": 4,
4597
  "trial_name": null,
4598
  "trial_params": null