pilotj commited on
Commit
67d85ff
·
verified ·
1 Parent(s): 1815b8e

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:486af49129a31e1b7c1e43e65eab8e26b8c4cc8ddc136711eef3f671ce00c3ec
3
  size 267906392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0caaf2194e7c11f747141a29914d2c7311b23ecc790f8f7391591c8cf62b5fdc
3
  size 267906392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bde14063113d3963bdceb328049937cf5555849d1cc907876d7f93ae43926358
3
  size 535874874
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b9e15d2567aa28604f83484a2dddda55b8d7a4563d4e9f37ba508020edb3f3
3
  size 535874874
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbb2ca652b13d24cd9ca0437acca204dd7dc408e95308fc4b43867a99c53a4ed
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714a88c6f44bf484acbe0664f5841b5c7784526e34bde08b3c5a79c600e45600
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:058930117d0715c41bf93049e22ed1e989ddac193b68a49ee9225dc685ba128a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c04d7c7aee3cb0d767a512834acc784813a19d6669cbaf00a8719d470988f65d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8291246891021729,
3
  "best_model_checkpoint": "/kaggle/working/results/checkpoint-10000",
4
- "epoch": 0.7792410192472532,
5
  "eval_steps": 1000,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -227,6 +227,50 @@
227
  "eval_samples_per_second": 104.017,
228
  "eval_steps_per_second": 0.82,
229
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "logging_steps": 500,
@@ -246,7 +290,7 @@
246
  "attributes": {}
247
  }
248
  },
249
- "total_flos": 4.240771055616e+16,
250
  "train_batch_size": 32,
251
  "trial_name": null,
252
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8291246891021729,
3
  "best_model_checkpoint": "/kaggle/working/results/checkpoint-10000",
4
+ "epoch": 0.9350892230967038,
5
  "eval_steps": 1000,
6
+ "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
227
  "eval_samples_per_second": 104.017,
228
  "eval_steps_per_second": 0.82,
229
  "step": 10000
230
+ },
231
+ {
232
+ "epoch": 0.8182030702096158,
233
+ "grad_norm": 2.9698691368103027,
234
+ "learning_rate": 1.8179692979038417e-05,
235
+ "loss": 0.4713,
236
+ "step": 10500
237
+ },
238
+ {
239
+ "epoch": 0.8571651211719785,
240
+ "grad_norm": 6.246368885040283,
241
+ "learning_rate": 1.4283487882802152e-05,
242
+ "loss": 0.4679,
243
+ "step": 11000
244
+ },
245
+ {
246
+ "epoch": 0.8571651211719785,
247
+ "eval_loss": 0.8628306984901428,
248
+ "eval_runtime": 49.8797,
249
+ "eval_samples_per_second": 104.251,
250
+ "eval_steps_per_second": 0.822,
251
+ "step": 11000
252
+ },
253
+ {
254
+ "epoch": 0.8961271721343411,
255
+ "grad_norm": 3.1601552963256836,
256
+ "learning_rate": 1.0387282786565886e-05,
257
+ "loss": 0.4565,
258
+ "step": 11500
259
+ },
260
+ {
261
+ "epoch": 0.9350892230967038,
262
+ "grad_norm": 2.656738758087158,
263
+ "learning_rate": 6.491077690329619e-06,
264
+ "loss": 0.4368,
265
+ "step": 12000
266
+ },
267
+ {
268
+ "epoch": 0.9350892230967038,
269
+ "eval_loss": 0.8968186378479004,
270
+ "eval_runtime": 49.901,
271
+ "eval_samples_per_second": 104.206,
272
+ "eval_steps_per_second": 0.822,
273
+ "step": 12000
274
  }
275
  ],
276
  "logging_steps": 500,
 
290
  "attributes": {}
291
  }
292
  },
293
+ "total_flos": 5.0889252667392e+16,
294
  "train_batch_size": 32,
295
  "trial_name": null,
296
  "trial_params": null