ncbateman commited on
Commit
ac28f03
·
verified ·
1 Parent(s): ceea410

Training in progress, step 520, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10275f345ea0979d1b901ec83beafa54f10605a1616e6e45aaa11cec9a48b9f5
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e50a11a2f59ac5336df1f861c9746e3ea52482c3b25e71230c372afe31efafd
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ee5974f4d7d08ab198cdf6267e534b797bb4e62af773544e265e26ba4912e97
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1656d9776aa288a9d3c783e91c4974f0bbbcdbed454e7021b914cb6025fb87
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5436483a49c337b81cda06ce61fa904bc4ee9904d9cddffb112a275af24fded
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdcc456807bf80d9919c70d11abc1b5d0f53d48b9062054a56c7c3a7e1457747
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811e492420306f2812d71dfa20a74673a76b05941954bb57f6f8b780cb51e734
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18af6c6b5b006201434fd2ba8945bc39086f249834bda28bb96a3935a9852f63
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6664509867356843,
5
  "eval_steps": 386,
6
- "global_step": 515,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3628,6 +3628,41 @@
3628
  "learning_rate": 9.636948197292052e-05,
3629
  "loss": 0.8313,
3630
  "step": 515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3631
  }
3632
  ],
3633
  "logging_steps": 1,
@@ -3647,7 +3682,7 @@
3647
  "attributes": {}
3648
  }
3649
  },
3650
- "total_flos": 5.757376434221875e+17,
3651
  "train_batch_size": 4,
3652
  "trial_name": null,
3653
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6729213846651569,
5
  "eval_steps": 386,
6
+ "global_step": 520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3628
  "learning_rate": 9.636948197292052e-05,
3629
  "loss": 0.8313,
3630
  "step": 515
3631
+ },
3632
+ {
3633
+ "epoch": 0.6677450663215788,
3634
+ "grad_norm": 0.7210227251052856,
3635
+ "learning_rate": 9.635404285252777e-05,
3636
+ "loss": 0.8593,
3637
+ "step": 516
3638
+ },
3639
+ {
3640
+ "epoch": 0.6690391459074733,
3641
+ "grad_norm": 0.7367059588432312,
3642
+ "learning_rate": 9.633857221567815e-05,
3643
+ "loss": 0.9743,
3644
+ "step": 517
3645
+ },
3646
+ {
3647
+ "epoch": 0.6703332254933678,
3648
+ "grad_norm": 0.8669779896736145,
3649
+ "learning_rate": 9.632307007289027e-05,
3650
+ "loss": 0.8914,
3651
+ "step": 518
3652
+ },
3653
+ {
3654
+ "epoch": 0.6716273050792624,
3655
+ "grad_norm": 0.700624406337738,
3656
+ "learning_rate": 9.630753643470416e-05,
3657
+ "loss": 0.9032,
3658
+ "step": 519
3659
+ },
3660
+ {
3661
+ "epoch": 0.6729213846651569,
3662
+ "grad_norm": 0.81085205078125,
3663
+ "learning_rate": 9.629197131168124e-05,
3664
+ "loss": 0.8612,
3665
+ "step": 520
3666
  }
3667
  ],
3668
  "logging_steps": 1,
 
3682
  "attributes": {}
3683
  }
3684
  },
3685
+ "total_flos": 5.813273292806554e+17,
3686
  "train_batch_size": 4,
3687
  "trial_name": null,
3688
  "trial_params": null