mattbonnell commited on
Commit
0bb1c14
·
verified ·
1 Parent(s): 392591d

Training in progress, step 16000, checkpoint

Browse files
last-checkpoint/global_step16000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74a25234c1f780e15e83309158ecb07c49c8c3351f6b3d5ebfbdf10c3d34478a
3
+ size 197282509
last-checkpoint/global_step16000/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b74f02b0b5a6b6d5843ca46bc53062ab759eed1584bc29a69cffa6a1736ec15
3
+ size 180416968
last-checkpoint/global_step16000/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ff92b38172cd635a23c777b6b6eefb0da4d30754fdf34e7d0630ddca213f14e
3
+ size 180416776
last-checkpoint/global_step16000/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11febf62c9f73266c25236ab50dfab41409f97bdb56adbf4d4edf021564aa534
3
+ size 180416776
last-checkpoint/global_step16000/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e31ccf4cfa0c45c8d9396cff77752649a49b0f35441d5d054bece6e2267085
3
+ size 180416904
last-checkpoint/global_step16000/zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1074e1d46fa74656185ee0b7b31dcbb97450cf0590305861d259d6c3219803
3
+ size 180416712
last-checkpoint/global_step16000/zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd2bae6b2bd55970d2a0ec1ce3814d6bc32e58d931c3bb34290e3f28f220dab
3
+ size 180417096
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step15500
 
1
+ global_step16000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbbfba19cf8c4868fcb591e428153e0dfab01e0000556ed74857ebf4d4d0b09c
3
  size 188836816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86cdbcf702f36b103531085178dbb194b5ed24a617c693786569c5f06a28a997
3
  size 188836816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e532e7f78ce215481f83407b0560a37e2979f8d3b5916593aae7cfa436c82ca8
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1762a051d2ef63782191c6ddb670046fd603a79f169ade2a20e3ec7968d73ae2
3
  size 15536
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54deac076126a8a5ab13f060f479d26e551e362b17517f4b1320311a9393ea91
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528307c815a7fc3ae0e940360ffd3bbc2afb3e407b6c50c9bc322b05f89b5a94
3
  size 15536
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1caa5002a3d632d2580f3767aeb17e7dcb9094ef0f0cec447e0bbe64dba963f
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94b4c77667b44a108f2c4524ecb78b00f15dea830fc559e3dd27f09695096d4e
3
  size 15536
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df07a8cc413b3fabbbcb751fedf4cd462f781f2168fcd682f0e751cf69c3eb1b
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2af1fcdccef02a571531525d33c8c2ab59e4a0d3036f87ff6b31fa9cbedb4da
3
  size 15536
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e047643b78223ae0aa07482134e20fe15812cc1e4c689a1b49ea591c7cdc5750
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c6761be70409b46c72197709a9276709a6e1e13c93978aa3d44a91c5dd5845
3
  size 15536
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b303e24ad7fee581c0e7fb49dd8e1426edcd88dd63fef654033f2a15ec87a19a
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd7e3c5995ac03f69428fef9fdfff61483fb597ae67d40f95f01eb5737db5e4
3
+ size 15536
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2272465e853654bd253a9da64924193b698e19218c7dbd82f58e52260bc06930
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90507ecd8d24438d3d06f3d874381f825e91ab823f24d3428100127026ea819d
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 418.9189189189189,
5
  "eval_steps": 1500,
6
- "global_step": 15500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2258,6 +2258,76 @@
2258
  "learning_rate": 0.0001,
2259
  "loss": 0.0129,
2260
  "step": 15500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2261
  }
2262
  ],
2263
  "logging_steps": 50,
@@ -2277,7 +2347,7 @@
2277
  "attributes": {}
2278
  }
2279
  },
2280
- "total_flos": 4.351020525227858e+20,
2281
  "train_batch_size": 64,
2282
  "trial_name": null,
2283
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 432.43243243243245,
5
  "eval_steps": 1500,
6
+ "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2258
  "learning_rate": 0.0001,
2259
  "loss": 0.0129,
2260
  "step": 15500
2261
+ },
2262
+ {
2263
+ "epoch": 420.27027027027026,
2264
+ "grad_norm": 0.35932862758636475,
2265
+ "learning_rate": 0.0001,
2266
+ "loss": 0.0133,
2267
+ "step": 15550
2268
+ },
2269
+ {
2270
+ "epoch": 421.6216216216216,
2271
+ "grad_norm": 0.20093189179897308,
2272
+ "learning_rate": 0.0001,
2273
+ "loss": 0.0126,
2274
+ "step": 15600
2275
+ },
2276
+ {
2277
+ "epoch": 422.97297297297297,
2278
+ "grad_norm": 0.32909420132637024,
2279
+ "learning_rate": 0.0001,
2280
+ "loss": 0.0128,
2281
+ "step": 15650
2282
+ },
2283
+ {
2284
+ "epoch": 424.3243243243243,
2285
+ "grad_norm": 0.28278329968452454,
2286
+ "learning_rate": 0.0001,
2287
+ "loss": 0.0117,
2288
+ "step": 15700
2289
+ },
2290
+ {
2291
+ "epoch": 425.6756756756757,
2292
+ "grad_norm": 0.1597350388765335,
2293
+ "learning_rate": 0.0001,
2294
+ "loss": 0.0119,
2295
+ "step": 15750
2296
+ },
2297
+ {
2298
+ "epoch": 427.02702702702703,
2299
+ "grad_norm": 0.20241086184978485,
2300
+ "learning_rate": 0.0001,
2301
+ "loss": 0.0114,
2302
+ "step": 15800
2303
+ },
2304
+ {
2305
+ "epoch": 428.3783783783784,
2306
+ "grad_norm": 0.24632301926612854,
2307
+ "learning_rate": 0.0001,
2308
+ "loss": 0.0115,
2309
+ "step": 15850
2310
+ },
2311
+ {
2312
+ "epoch": 429.72972972972974,
2313
+ "grad_norm": 0.36104726791381836,
2314
+ "learning_rate": 0.0001,
2315
+ "loss": 0.0114,
2316
+ "step": 15900
2317
+ },
2318
+ {
2319
+ "epoch": 431.0810810810811,
2320
+ "grad_norm": 0.23273630440235138,
2321
+ "learning_rate": 0.0001,
2322
+ "loss": 0.0115,
2323
+ "step": 15950
2324
+ },
2325
+ {
2326
+ "epoch": 432.43243243243245,
2327
+ "grad_norm": 0.2528134882450104,
2328
+ "learning_rate": 0.0001,
2329
+ "loss": 0.0113,
2330
+ "step": 16000
2331
  }
2332
  ],
2333
  "logging_steps": 50,
 
2347
  "attributes": {}
2348
  }
2349
  },
2350
+ "total_flos": 4.491310429887309e+20,
2351
  "train_batch_size": 64,
2352
  "trial_name": null,
2353
  "trial_params": null