ncbateman commited on
Commit
ced3bbc
·
verified ·
1 Parent(s): c8ee262

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2818821d2bd85c742350df6f93ddc6ceaa5a28fbd9c64e00f2455a5ea94572d
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aabb06c1fe37ff25379e382b1f17082b07d34182a4fc2da6ffae8579ac4675e
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3c3d6d6a41f72e4e2daea557637ea5b684d9fb7a2db23f4008a8c201310880c
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d75409d4f4ef99a18e288d75b5c709cef3c171c10c32cf11b3494ebb2d324c6
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c791046602634dc7125db7c8a6fa9643238ca36be61916506dd41a5029b29c2a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4a5aa1d65732ea6e6ad6ffbd33a4afe19476644f2bd043f99022469dab6bc0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:743b3e06741f3e617852c286974506bffbbfec118a8156be26ba7327d67612e1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88a0b12b39fadaa49a1f55d69192330694c5d8626f92166735ca7ee1b34dd9e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8993853121967001,
5
  "eval_steps": 386,
6
- "global_step": 695,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4888,6 +4888,41 @@
4888
  "learning_rate": 9.309365924047853e-05,
4889
  "loss": 0.9606,
4890
  "step": 695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4891
  }
4892
  ],
4893
  "logging_steps": 1,
@@ -4907,7 +4942,7 @@
4907
  "attributes": {}
4908
  }
4909
  },
4910
- "total_flos": 7.769663343270298e+17,
4911
  "train_batch_size": 4,
4912
  "trial_name": null,
4913
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9058557101261727,
5
  "eval_steps": 386,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4888
  "learning_rate": 9.309365924047853e-05,
4889
  "loss": 0.9606,
4890
  "step": 695
4891
+ },
4892
+ {
4893
+ "epoch": 0.9006793917825946,
4894
+ "grad_norm": 0.7008129954338074,
4895
+ "learning_rate": 9.307273676353432e-05,
4896
+ "loss": 0.8531,
4897
+ "step": 696
4898
+ },
4899
+ {
4900
+ "epoch": 0.9019734713684892,
4901
+ "grad_norm": 0.8026110529899597,
4902
+ "learning_rate": 9.305178500111755e-05,
4903
+ "loss": 0.7784,
4904
+ "step": 697
4905
+ },
4906
+ {
4907
+ "epoch": 0.9032675509543837,
4908
+ "grad_norm": 0.7309970855712891,
4909
+ "learning_rate": 9.30308039674735e-05,
4910
+ "loss": 0.9284,
4911
+ "step": 698
4912
+ },
4913
+ {
4914
+ "epoch": 0.9045616305402783,
4915
+ "grad_norm": 0.801511824131012,
4916
+ "learning_rate": 9.300979367686729e-05,
4917
+ "loss": 0.8111,
4918
+ "step": 699
4919
+ },
4920
+ {
4921
+ "epoch": 0.9058557101261727,
4922
+ "grad_norm": 0.8487135767936707,
4923
+ "learning_rate": 9.298875414358399e-05,
4924
+ "loss": 0.9095,
4925
+ "step": 700
4926
  }
4927
  ],
4928
  "logging_steps": 1,
 
4942
  "attributes": {}
4943
  }
4944
  },
4945
+ "total_flos": 7.825560201854976e+17,
4946
  "train_batch_size": 4,
4947
  "trial_name": null,
4948
  "trial_params": null