ncbateman commited on
Commit
16efbd8
1 Parent(s): 072ddc3

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec824641add39464132135bb7a178865727158172de3bdde8ba2e0a866892f41
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e32ecc4999e66597d6819db1b325b5b0ccede14c58906e75e5fbb00465abcf7
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e6bf779b6bee53a40e25587a5a42e0d05bf8ba9a3e393cce3a2b2f78901828
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8732632a99ef4adc44a4fba23fd2bdffd4c8f2489dc2047a47877ca39df5ad
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f710d7aa27629692705a763863916eca2f9c0f51210ce83e8b983733d085a79
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643cbcf647e05c0752cc07e6bea7b36366cf0ca8f60a1980d267a0557a0793d4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:292ac4e1ecba553470239753cee72c97588f1df45bbde70904d7eb64da490cd4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd12adaab97c53ac60ce303cf8c26b269527c4e18959058acb54d1c4d122d8e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9640892914914267,
5
  "eval_steps": 386,
6
- "global_step": 745,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5238,6 +5238,41 @@
5238
  "learning_rate": 9.201194445904803e-05,
5239
  "loss": 1.0089,
5240
  "step": 745
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5241
  }
5242
  ],
5243
  "logging_steps": 1,
@@ -5257,7 +5292,7 @@
5257
  "attributes": {}
5258
  }
5259
  },
5260
- "total_flos": 8.328631929117082e+17,
5261
  "train_batch_size": 4,
5262
  "trial_name": null,
5263
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9705596894208994,
5
  "eval_steps": 386,
6
+ "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5238
  "learning_rate": 9.201194445904803e-05,
5239
  "loss": 1.0089,
5240
  "step": 745
5241
+ },
5242
+ {
5243
+ "epoch": 0.9653833710773213,
5244
+ "grad_norm": 0.747582733631134,
5245
+ "learning_rate": 9.198957554672885e-05,
5246
+ "loss": 0.9392,
5247
+ "step": 746
5248
+ },
5249
+ {
5250
+ "epoch": 0.9666774506632158,
5251
+ "grad_norm": 0.7720314264297485,
5252
+ "learning_rate": 9.196717808538641e-05,
5253
+ "loss": 0.9572,
5254
+ "step": 747
5255
+ },
5256
+ {
5257
+ "epoch": 0.9679715302491103,
5258
+ "grad_norm": 0.802967369556427,
5259
+ "learning_rate": 9.194475209024895e-05,
5260
+ "loss": 0.697,
5261
+ "step": 748
5262
+ },
5263
+ {
5264
+ "epoch": 0.9692656098350049,
5265
+ "grad_norm": 0.7272825837135315,
5266
+ "learning_rate": 9.192229757656406e-05,
5267
+ "loss": 0.7704,
5268
+ "step": 749
5269
+ },
5270
+ {
5271
+ "epoch": 0.9705596894208994,
5272
+ "grad_norm": 0.7247483730316162,
5273
+ "learning_rate": 9.189981455959874e-05,
5274
+ "loss": 0.8788,
5275
+ "step": 750
5276
  }
5277
  ],
5278
  "logging_steps": 1,
 
5292
  "attributes": {}
5293
  }
5294
  },
5295
+ "total_flos": 8.38452878770176e+17,
5296
  "train_batch_size": 4,
5297
  "trial_name": null,
5298
  "trial_params": null