ncbateman commited on
Commit
9d8baa3
1 Parent(s): fd485a7

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edb455920aa95cf9ada95ab94d78e0b6940976a5262e1f27d468e9d2ce187ddc
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e99e59bad3936a53b55c549edee9c54e5ef217806171186930b2376024b82a
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f479c4315b5ddbf149138fc6ec05fc8a86a44a6cbc038a90a7aefa3ebbfc7fb3
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac8286fb2b82ebe608c12d6303458dca5fc5c8311d32ecaa4907f21dd6efdd32
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd424b40558ff64379aeb46692c348d6b618b41d4236dc9fd2e104bf0cbad808
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5bcd07c7980be2bcc780ec62347040974b973719b0561b7d2425edc8426ebd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e916177211118fc8a152e53676009788e79374ee54f188f77aebc8c03e2bf7b9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814b79b47e12bde76b22a1ac4fd2f1e7ddf84c332d0b19c3538a1fad1c6cc96e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10309278350515463,
5
  "eval_steps": 97,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -155,6 +155,41 @@
155
  "learning_rate": 4e-05,
156
  "loss": 1.1069,
157
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  }
159
  ],
160
  "logging_steps": 1,
@@ -174,7 +209,7 @@
174
  "attributes": {}
175
  }
176
  },
177
- "total_flos": 7741780556513280.0,
178
  "train_batch_size": 4,
179
  "trial_name": null,
180
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12886597938144329,
5
  "eval_steps": 97,
6
+ "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
155
  "learning_rate": 4e-05,
156
  "loss": 1.1069,
157
  "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.10824742268041238,
161
+ "grad_norm": 0.3861861526966095,
162
+ "learning_rate": 4.2e-05,
163
+ "loss": 0.864,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.1134020618556701,
168
+ "grad_norm": 0.5285366773605347,
169
+ "learning_rate": 4.4000000000000006e-05,
170
+ "loss": 0.8627,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.11855670103092783,
175
+ "grad_norm": 0.39096203446388245,
176
+ "learning_rate": 4.600000000000001e-05,
177
+ "loss": 0.8984,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.12371134020618557,
182
+ "grad_norm": 0.39784133434295654,
183
+ "learning_rate": 4.8e-05,
184
+ "loss": 0.9631,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.12886597938144329,
189
+ "grad_norm": 0.37275853753089905,
190
+ "learning_rate": 5e-05,
191
+ "loss": 0.831,
192
+ "step": 25
193
  }
194
  ],
195
  "logging_steps": 1,
 
209
  "attributes": {}
210
  }
211
  },
212
+ "total_flos": 9677225695641600.0,
213
  "train_batch_size": 4,
214
  "trial_name": null,
215
  "trial_params": null