ncbateman commited on
Commit
5bcd5f6
·
verified ·
1 Parent(s): 8142a09

Training in progress, step 340, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a88ed30b979e8b79a0b8e0587def94ba339e1948a5407035566744f99c4d5d
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c82180f14717620e0bf756c85b583676dc7d69114368727db88aae103d64aa3
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fb839e71f2559609681b6b808413e24fd0cd168e4b4c062feff2eeb23eeb240
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94f089a1e467259428315e6e4ad483b5faaba1f2396f78d9eb6bf56641de7b9
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07fa940b7237013ec0cf4dbb080091b3cbff44f1b6dbb14ecfef39a6acf258de
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6a98cf43c9788166599038375eb01cf5493b9863f02eb9463a77c771ad5463
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dee82665dfa8e81a745327347013c931be0b2da410ed681c21a036a30f1549b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5097543f48561dead5ef0816caa212645fb361fee6d0137dbeaac39adc3b6ec8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.43351666127466837,
5
  "eval_steps": 386,
6
- "global_step": 335,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2360,6 +2360,41 @@
2360
  "learning_rate": 9.862570513027735e-05,
2361
  "loss": 0.9637,
2362
  "step": 335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2363
  }
2364
  ],
2365
  "logging_steps": 1,
@@ -2379,7 +2414,7 @@
2379
  "attributes": {}
2380
  }
2381
  },
2382
- "total_flos": 3.745089525173453e+17,
2383
  "train_batch_size": 4,
2384
  "trial_name": null,
2385
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.43998705920414105,
5
  "eval_steps": 386,
6
+ "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2360
  "learning_rate": 9.862570513027735e-05,
2361
  "loss": 0.9637,
2362
  "step": 335
2363
+ },
2364
+ {
2365
+ "epoch": 0.4348107408605629,
2366
+ "grad_norm": 0.8372804522514343,
2367
+ "learning_rate": 9.861608885094012e-05,
2368
+ "loss": 0.8609,
2369
+ "step": 336
2370
+ },
2371
+ {
2372
+ "epoch": 0.43610482044645743,
2373
+ "grad_norm": 0.8712325096130371,
2374
+ "learning_rate": 9.860643951716421e-05,
2375
+ "loss": 0.9718,
2376
+ "step": 337
2377
+ },
2378
+ {
2379
+ "epoch": 0.437398900032352,
2380
+ "grad_norm": 0.9869045615196228,
2381
+ "learning_rate": 9.859675713551028e-05,
2382
+ "loss": 0.887,
2383
+ "step": 338
2384
+ },
2385
+ {
2386
+ "epoch": 0.43869297961824655,
2387
+ "grad_norm": 0.9166460037231445,
2388
+ "learning_rate": 9.858704171256145e-05,
2389
+ "loss": 1.0751,
2390
+ "step": 339
2391
+ },
2392
+ {
2393
+ "epoch": 0.43998705920414105,
2394
+ "grad_norm": 1.1965091228485107,
2395
+ "learning_rate": 9.857729325492329e-05,
2396
+ "loss": 1.0093,
2397
+ "step": 340
2398
  }
2399
  ],
2400
  "logging_steps": 1,
 
2414
  "attributes": {}
2415
  }
2416
  },
2417
+ "total_flos": 3.800986383758131e+17,
2418
  "train_batch_size": 4,
2419
  "trial_name": null,
2420
  "trial_params": null