bhuvanmdev commited on
Commit
acfc52d
·
verified ·
1 Parent(s): 72d7b74

Training in progress, step 640, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c42e3144125416adcb9a1ec0d9b0eb55619b6a3619ed6542a06de661bbf161b2
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59a37c02b250fde83ba038c839a2e952bf520ce910f43a7857db234f396d3f0
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9177ecdd03afda4d13d5d2eb2a0c1d5fbe522c00f0127e7917ebf328b282a0c8
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a9df9fefae970af178ffc5c6daff5bac43c4ffccc8bb655e4c8b0717bde90a
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57a916e4fd36223dffeadbeb32e21c87fb935df188a4e2e19aafc7b1c3d84241
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72fb99776019a0f4753f71a958f1e0ab8cd89837117e79e8970f4ea20b12a6d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:398bdc23ac4d9e39643bb660fbaeca4b591289face79e5178809ed45f99c413b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd83824b10c800d2a1e10e6af2da6cf8778074505a180484fec6f86647c2253
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.22182468694096602,
5
  "eval_steps": 500,
6
- "global_step": 620,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -503,14 +503,30 @@
503
  "loss": 0.4495,
504
  "num_input_tokens_seen": 413867,
505
  "step": 620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  }
507
  ],
508
  "logging_steps": 10,
509
  "max_steps": 2795,
510
- "num_input_tokens_seen": 413867,
511
  "num_train_epochs": 1,
512
  "save_steps": 20,
513
- "total_flos": 9306407303387136.0,
514
  "train_batch_size": 1,
515
  "trial_name": null,
516
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.22898032200357782,
5
  "eval_steps": 500,
6
+ "global_step": 640,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
503
  "loss": 0.4495,
504
  "num_input_tokens_seen": 413867,
505
  "step": 620
506
+ },
507
+ {
508
+ "epoch": 0.22540250447227192,
509
+ "grad_norm": 0.41710999608039856,
510
+ "learning_rate": 0.00015491949910554563,
511
+ "loss": 0.426,
512
+ "num_input_tokens_seen": 422712,
513
+ "step": 630
514
+ },
515
+ {
516
+ "epoch": 0.22898032200357782,
517
+ "grad_norm": 0.42847341299057007,
518
+ "learning_rate": 0.00015420393559928446,
519
+ "loss": 0.4163,
520
+ "num_input_tokens_seen": 428523,
521
+ "step": 640
522
  }
523
  ],
524
  "logging_steps": 10,
525
  "max_steps": 2795,
526
+ "num_input_tokens_seen": 428523,
527
  "num_train_epochs": 1,
528
  "save_steps": 20,
529
+ "total_flos": 9635968987305984.0,
530
  "train_batch_size": 1,
531
  "trial_name": null,
532
  "trial_params": null