bhuvanmdev commited on
Commit
755c4ed
1 Parent(s): d7ce047

Training in progress, step 580, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7af6ede55d0341241bc32219ef506f81ee7d0a6598b2c326b762b273b49ba7c
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:423887fe77f4e0baaa663ac930e3bec81e8cf72266035e2f077c059a4f29b9aa
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d712e257f17863cb11322f71673372ff307ede78b55ed17a21b191a03b5a310
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3a9eef98c4e03dca15f1fb920809d2513c26c4e4d80ec79ee9def992aef167
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:813c071ac276f83e1b6d20b4afa7012ff50b1e905202a7b0d6045d0bc7d97762
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c628140ac361731721c9a78828173eec4d00472815f19e4c30739bcc5f9a3440
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fda21cee6c433e296b92e0e41eec8879af8f118fc5a2faf949201c0875cc586
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a82ddc2c8aea33997924d2c7f637842a8fb10ce7ce6b85d2cc712db7c0b67d47
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2003577817531306,
5
  "eval_steps": 500,
6
- "global_step": 560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -455,14 +455,30 @@
455
  "loss": 0.4329,
456
  "num_input_tokens_seen": 373196,
457
  "step": 560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  }
459
  ],
460
  "logging_steps": 10,
461
  "max_steps": 2795,
462
- "num_input_tokens_seen": 373196,
463
  "num_train_epochs": 1,
464
  "save_steps": 20,
465
- "total_flos": 8391860138631168.0,
466
  "train_batch_size": 1,
467
  "trial_name": null,
468
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2075134168157424,
5
  "eval_steps": 500,
6
+ "global_step": 580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
455
  "loss": 0.4329,
456
  "num_input_tokens_seen": 373196,
457
  "step": 560
458
+ },
459
+ {
460
+ "epoch": 0.2039355992844365,
461
+ "grad_norm": 0.36333030462265015,
462
+ "learning_rate": 0.0001592128801431127,
463
+ "loss": 0.4482,
464
+ "num_input_tokens_seen": 380449,
465
+ "step": 570
466
+ },
467
+ {
468
+ "epoch": 0.2075134168157424,
469
+ "grad_norm": 0.2979726195335388,
470
+ "learning_rate": 0.00015849731663685151,
471
+ "loss": 0.4258,
472
+ "num_input_tokens_seen": 386577,
473
+ "step": 580
474
  }
475
  ],
476
  "logging_steps": 10,
477
  "max_steps": 2795,
478
+ "num_input_tokens_seen": 386577,
479
  "num_train_epochs": 1,
480
  "save_steps": 20,
481
+ "total_flos": 8692751575074816.0,
482
  "train_batch_size": 1,
483
  "trial_name": null,
484
  "trial_params": null