bhuvanmdev commited on
Commit
3fddea4
·
verified ·
1 Parent(s): fb5221d

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1149ac7d2e20a3329470bc1ff37dc81499b3a2cb0df815434dcf8dae04d7c3e3
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4ab9a57a93c3513ea05caae28c984ac3710ebaf13f368939d8bad1fd992ae0
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdd9e444faaff4d97315caba29dfc8c642a3e55aa3d110711caa1e01bf50f4b7
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97d9a87ee6b6052046f0b14d04eaaf0357892b7a7f4727b789c67069b7547c6a
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffef31896db6975b47a1bdf431486b1a47c0e23e76fe29d6042f106a12e8af6b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87fe9fb3b31ca0189979c0daa73883f6e8cf029b274919616b419e017ad599ca
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84691b1971218726e113e2882d1da3f46cde022409a500bc85e18521371a8edd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:511871b8163e6c2442a4d43033ca6a9432c171a817a07d44c57891428e700525
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.17173524150268335,
5
  "eval_steps": 500,
6
- "global_step": 480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -391,14 +391,30 @@
391
  "loss": 0.4089,
392
  "num_input_tokens_seen": 319432,
393
  "step": 480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  }
395
  ],
396
  "logging_steps": 10,
397
  "max_steps": 2795,
398
- "num_input_tokens_seen": 319432,
399
  "num_train_epochs": 1,
400
  "save_steps": 20,
401
- "total_flos": 7182897640390656.0,
402
  "train_batch_size": 1,
403
  "trial_name": null,
404
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.17889087656529518,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
391
  "loss": 0.4089,
392
  "num_input_tokens_seen": 319432,
393
  "step": 480
394
+ },
395
+ {
396
+ "epoch": 0.17531305903398928,
397
+ "grad_norm": 0.31139102578163147,
398
+ "learning_rate": 0.00016493738819320215,
399
+ "loss": 0.4244,
400
+ "num_input_tokens_seen": 325211,
401
+ "step": 490
402
+ },
403
+ {
404
+ "epoch": 0.17889087656529518,
405
+ "grad_norm": 0.34276363253593445,
406
+ "learning_rate": 0.00016422182468694098,
407
+ "loss": 0.4133,
408
+ "num_input_tokens_seen": 332100,
409
+ "step": 500
410
  }
411
  ],
412
  "logging_steps": 10,
413
  "max_steps": 2795,
414
+ "num_input_tokens_seen": 332100,
415
  "num_train_epochs": 1,
416
  "save_steps": 20,
417
+ "total_flos": 7467756224716800.0,
418
  "train_batch_size": 1,
419
  "trial_name": null,
420
  "trial_params": null