bhuvanmdev commited on
Commit
b49e7dc
·
verified ·
1 Parent(s): 30e5a89

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:423887fe77f4e0baaa663ac930e3bec81e8cf72266035e2f077c059a4f29b9aa
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4aa0619718301c4cee6a46cec390e99f468dbc3294652a09dad80c2c1c52ac
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3a9eef98c4e03dca15f1fb920809d2513c26c4e4d80ec79ee9def992aef167
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3f0d6d31265332cf0153ce373f616a507a1b2bdb68c61151d6f231efb64720
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c628140ac361731721c9a78828173eec4d00472815f19e4c30739bcc5f9a3440
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a3896f4879562a69571237c21fd91dc0cba3215ed23fb9ae7f0374432fee52f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82ddc2c8aea33997924d2c7f637842a8fb10ce7ce6b85d2cc712db7c0b67d47
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18b7d573288acd75186bfa1799a658370b6b400daf569612aa7a4ba8b0298a1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2075134168157424,
5
  "eval_steps": 500,
6
- "global_step": 580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -471,14 +471,30 @@
471
  "loss": 0.4258,
472
  "num_input_tokens_seen": 386577,
473
  "step": 580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  }
475
  ],
476
  "logging_steps": 10,
477
  "max_steps": 2795,
478
- "num_input_tokens_seen": 386577,
479
  "num_train_epochs": 1,
480
  "save_steps": 20,
481
- "total_flos": 8692751575074816.0,
482
  "train_batch_size": 1,
483
  "trial_name": null,
484
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2146690518783542,
5
  "eval_steps": 500,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
471
  "loss": 0.4258,
472
  "num_input_tokens_seen": 386577,
473
  "step": 580
474
+ },
475
+ {
476
+ "epoch": 0.2110912343470483,
477
+ "grad_norm": 0.2969113886356354,
478
+ "learning_rate": 0.00015778175313059035,
479
+ "loss": 0.433,
480
+ "num_input_tokens_seen": 392953,
481
+ "step": 590
482
+ },
483
+ {
484
+ "epoch": 0.2146690518783542,
485
+ "grad_norm": 0.4132014811038971,
486
+ "learning_rate": 0.00015706618962432918,
487
+ "loss": 0.4148,
488
+ "num_input_tokens_seen": 399368,
489
+ "step": 600
490
  }
491
  ],
492
  "logging_steps": 10,
493
  "max_steps": 2795,
494
+ "num_input_tokens_seen": 399368,
495
  "num_train_epochs": 1,
496
  "save_steps": 20,
497
+ "total_flos": 8980375995039744.0,
498
  "train_batch_size": 1,
499
  "trial_name": null,
500
  "trial_params": null