ncbateman commited on
Commit
7afec63
·
verified ·
1 Parent(s): d1189b0

Training in progress, step 375, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3feb049f84caa6e45dce230afc9473e26f98520e4afa70a37072f4ef66b84c97
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:661491863db880d889462331cf28f24df2f6df70492c034b40f718f3f65523bb
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f0d3ec45aa2db7e380044bf42b2b85a321f290d5cd471c615808e98ec9617ea
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a154b794f3a0235100d8e5790fb4656d040d6aabf01fddc63b24d541ce7141
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adca6753b49430d8d85732dd961bf4e168c9f525f80ba2505ec99cb6254ea83c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0002e057fedb4f60b1b185f2a74688f5fb1a0b57ce953ac999f33226551711f0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d671199ea99b2115775ea1b1182417de6788e71b7fb0dc4b7b1ebf77612cd032
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:035a0df081c185ccdc3573767b609ca409aa3c3d9e7594dbbc7218f373a1074d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.47880944678097703,
5
  "eval_steps": 386,
6
- "global_step": 370,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2605,6 +2605,41 @@
2605
  "learning_rate": 9.826951207696258e-05,
2606
  "loss": 0.8781,
2607
  "step": 370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2608
  }
2609
  ],
2610
  "logging_steps": 1,
@@ -2624,7 +2659,7 @@
2624
  "attributes": {}
2625
  }
2626
  },
2627
- "total_flos": 4.1363675352662016e+17,
2628
  "train_batch_size": 4,
2629
  "trial_name": null,
2630
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4852798447104497,
5
  "eval_steps": 386,
6
+ "global_step": 375,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2605
  "learning_rate": 9.826951207696258e-05,
2606
  "loss": 0.8781,
2607
  "step": 370
2608
+ },
2609
+ {
2610
+ "epoch": 0.48010352636687154,
2611
+ "grad_norm": 0.959077775478363,
2612
+ "learning_rate": 9.825874294268396e-05,
2613
+ "loss": 0.9448,
2614
+ "step": 371
2615
+ },
2616
+ {
2617
+ "epoch": 0.4813976059527661,
2618
+ "grad_norm": 0.7479959726333618,
2619
+ "learning_rate": 9.824794099692878e-05,
2620
+ "loss": 0.8608,
2621
+ "step": 372
2622
+ },
2623
+ {
2624
+ "epoch": 0.48269168553866065,
2625
+ "grad_norm": 0.8064723014831543,
2626
+ "learning_rate": 9.823710624704137e-05,
2627
+ "loss": 1.1222,
2628
+ "step": 373
2629
+ },
2630
+ {
2631
+ "epoch": 0.48398576512455516,
2632
+ "grad_norm": 0.8007084131240845,
2633
+ "learning_rate": 9.822623870038838e-05,
2634
+ "loss": 0.8967,
2635
+ "step": 374
2636
+ },
2637
+ {
2638
+ "epoch": 0.4852798447104497,
2639
+ "grad_norm": 0.988571286201477,
2640
+ "learning_rate": 9.82153383643587e-05,
2641
+ "loss": 0.9574,
2642
+ "step": 375
2643
  }
2644
  ],
2645
  "logging_steps": 1,
 
2659
  "attributes": {}
2660
  }
2661
  },
2662
+ "total_flos": 4.19226439385088e+17,
2663
  "train_batch_size": 4,
2664
  "trial_name": null,
2665
  "trial_params": null