bhuvanmdev commited on
Commit
e27a3f9
·
verified ·
1 Parent(s): 1330e4b

Training in progress, step 560, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ca0724696c0714678baa7c919014b6cf1f56a5d8f9a9076b8131ecb4e4820b
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7af6ede55d0341241bc32219ef506f81ee7d0a6598b2c326b762b273b49ba7c
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c81ca955f4aa3495f958edfe9940021e3673d3e58c241146187bf4ccb7cc56df
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d712e257f17863cb11322f71673372ff307ede78b55ed17a21b191a03b5a310
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94817a11f2e5ac87d214de7bcbd2b4175429bcc3bf1d7ea823b2e61e3e96885
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:813c071ac276f83e1b6d20b4afa7012ff50b1e905202a7b0d6045d0bc7d97762
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:857a28d74b36678189295e64a86a5f88d7b3dd82cb001caca49c082b46d9200f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fda21cee6c433e296b92e0e41eec8879af8f118fc5a2faf949201c0875cc586
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.19320214669051877,
5
  "eval_steps": 500,
6
- "global_step": 540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -439,14 +439,30 @@
439
  "loss": 0.4314,
440
  "num_input_tokens_seen": 360961,
441
  "step": 540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  }
443
  ],
444
  "logging_steps": 10,
445
  "max_steps": 2795,
446
- "num_input_tokens_seen": 360961,
447
  "num_train_epochs": 1,
448
  "save_steps": 20,
449
- "total_flos": 8116738195212288.0,
450
  "train_batch_size": 1,
451
  "trial_name": null,
452
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2003577817531306,
5
  "eval_steps": 500,
6
+ "global_step": 560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
439
  "loss": 0.4314,
440
  "num_input_tokens_seen": 360961,
441
  "step": 540
442
+ },
443
+ {
444
+ "epoch": 0.1967799642218247,
445
+ "grad_norm": 0.30050089955329895,
446
+ "learning_rate": 0.00016064400715563507,
447
+ "loss": 0.4364,
448
+ "num_input_tokens_seen": 367083,
449
+ "step": 550
450
+ },
451
+ {
452
+ "epoch": 0.2003577817531306,
453
+ "grad_norm": 0.3418981432914734,
454
+ "learning_rate": 0.0001599284436493739,
455
+ "loss": 0.4329,
456
+ "num_input_tokens_seen": 373196,
457
+ "step": 560
458
  }
459
  ],
460
  "logging_steps": 10,
461
  "max_steps": 2795,
462
+ "num_input_tokens_seen": 373196,
463
  "num_train_epochs": 1,
464
  "save_steps": 20,
465
+ "total_flos": 8391860138631168.0,
466
  "train_batch_size": 1,
467
  "trial_name": null,
468
  "trial_params": null