bhuvanmdev commited on
Commit
00890cd
·
verified ·
1 Parent(s): 50a43cb

Training in progress, step 520, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec4ab9a57a93c3513ea05caae28c984ac3710ebaf13f368939d8bad1fd992ae0
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcedf5ba185dfed8a8532b4caf834fa25d3dcefcb40737f26670abacdb4ac535
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97d9a87ee6b6052046f0b14d04eaaf0357892b7a7f4727b789c67069b7547c6a
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:759c1758be952783d5bfbf82779b75426f877cd751c0c1e49f49db2f8f652cf7
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87fe9fb3b31ca0189979c0daa73883f6e8cf029b274919616b419e017ad599ca
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49367cdd574f043b0528d491fffcb3439b253a28e3d60840b470ada16f51e852
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:511871b8163e6c2442a4d43033ca6a9432c171a817a07d44c57891428e700525
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c41970a14a61e2b623506dfd598a59afb05006b7fbb269fb8f9aac0b5a12d27
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.17889087656529518,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -407,14 +407,30 @@
407
  "loss": 0.4133,
408
  "num_input_tokens_seen": 332100,
409
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
  ],
412
  "logging_steps": 10,
413
  "max_steps": 2795,
414
- "num_input_tokens_seen": 332100,
415
  "num_train_epochs": 1,
416
  "save_steps": 20,
417
- "total_flos": 7467756224716800.0,
418
  "train_batch_size": 1,
419
  "trial_name": null,
420
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.18604651162790697,
5
  "eval_steps": 500,
6
+ "global_step": 520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
407
  "loss": 0.4133,
408
  "num_input_tokens_seen": 332100,
409
  "step": 500
410
+ },
411
+ {
412
+ "epoch": 0.18246869409660108,
413
+ "grad_norm": 0.39435234665870667,
414
+ "learning_rate": 0.0001635062611806798,
415
+ "loss": 0.4428,
416
+ "num_input_tokens_seen": 337848,
417
+ "step": 510
418
+ },
419
+ {
420
+ "epoch": 0.18604651162790697,
421
+ "grad_norm": 0.2763209640979767,
422
+ "learning_rate": 0.00016279069767441862,
423
+ "loss": 0.4464,
424
+ "num_input_tokens_seen": 345759,
425
+ "step": 520
426
  }
427
  ],
428
  "logging_steps": 10,
429
  "max_steps": 2795,
430
+ "num_input_tokens_seen": 345759,
431
  "num_train_epochs": 1,
432
  "save_steps": 20,
433
+ "total_flos": 7774898899433472.0,
434
  "train_batch_size": 1,
435
  "trial_name": null,
436
  "trial_params": null