ncbateman commited on
Commit
42fe6a0
·
verified ·
1 Parent(s): 70ca3c0

Training in progress, step 345, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c82180f14717620e0bf756c85b583676dc7d69114368727db88aae103d64aa3
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10742d0d8bf9f86583fe8f8e669e4185a674f714dc49ffdfb5d6b15de14bf150
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d94f089a1e467259428315e6e4ad483b5faaba1f2396f78d9eb6bf56641de7b9
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2f76262232ba65042a794500f554111ee7dc02d2d4b3bafbd2f6f6ab44d14c
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe6a98cf43c9788166599038375eb01cf5493b9863f02eb9463a77c771ad5463
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed11591e8f20b4787f4e6cf3c53a96e282f5ef0a7dd0a280b2e82dae9893e2c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5097543f48561dead5ef0816caa212645fb361fee6d0137dbeaac39adc3b6ec8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9bb8b24f4a9f7edc29066576ef36f9a38b3dc95e778ec0d2e8b280ebcd9c22d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.43998705920414105,
5
  "eval_steps": 386,
6
- "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2395,6 +2395,41 @@
2395
  "learning_rate": 9.857729325492329e-05,
2396
  "loss": 1.0093,
2397
  "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2398
  }
2399
  ],
2400
  "logging_steps": 1,
@@ -2414,7 +2449,7 @@
2414
  "attributes": {}
2415
  }
2416
  },
2417
- "total_flos": 3.800986383758131e+17,
2418
  "train_batch_size": 4,
2419
  "trial_name": null,
2420
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.44645745713361373,
5
  "eval_steps": 386,
6
+ "global_step": 345,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2395
  "learning_rate": 9.857729325492329e-05,
2396
  "loss": 1.0093,
2397
  "step": 340
2398
+ },
2399
+ {
2400
+ "epoch": 0.4412811387900356,
2401
+ "grad_norm": 0.8646867275238037,
2402
+ "learning_rate": 9.856751176922388e-05,
2403
+ "loss": 0.9235,
2404
+ "step": 341
2405
+ },
2406
+ {
2407
+ "epoch": 0.4425752183759301,
2408
+ "grad_norm": 0.7576479315757751,
2409
+ "learning_rate": 9.85576972621137e-05,
2410
+ "loss": 0.8323,
2411
+ "step": 342
2412
+ },
2413
+ {
2414
+ "epoch": 0.44386929796182467,
2415
+ "grad_norm": 0.8257366418838501,
2416
+ "learning_rate": 9.854784974026572e-05,
2417
+ "loss": 0.8478,
2418
+ "step": 343
2419
+ },
2420
+ {
2421
+ "epoch": 0.4451633775477192,
2422
+ "grad_norm": 0.8963577747344971,
2423
+ "learning_rate": 9.853796921037534e-05,
2424
+ "loss": 0.8943,
2425
+ "step": 344
2426
+ },
2427
+ {
2428
+ "epoch": 0.44645745713361373,
2429
+ "grad_norm": 0.8696343898773193,
2430
+ "learning_rate": 9.85280556791604e-05,
2431
+ "loss": 0.991,
2432
+ "step": 345
2433
  }
2434
  ],
2435
  "logging_steps": 1,
 
2449
  "attributes": {}
2450
  }
2451
  },
2452
+ "total_flos": 3.8568832423428096e+17,
2453
  "train_batch_size": 4,
2454
  "trial_name": null,
2455
  "trial_params": null