iamnguyen commited on
Commit
f8624af
1 Parent(s): 472b702

Training in progress, step 44, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d8660ac4f46748d4fea7d183b030ee476ead541262b87e9cd468abd5cf2fd3f
3
  size 590925768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b692fb3e97cf7d7f117b648d2d9d5c6cb4303fed1b353b124d68b9bae0ce7af
3
  size 590925768
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:893d356ba86f8ffdcdcad783cb3fa0c7a7eac39b12a727448815b1cd1e8d5645
3
  size 296481140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8e2c889141e437e95e4167240260c77a3c831959e07afd53c0a940b120467aa
3
  size 296481140
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:280dd73179115c4c161606f8d4213df78626a76fac949f56bf41551396432bdf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a33d7ef10436adaddd7a5b0ae02a851e8464f7fece53aa135a75d0095ea5afe
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05844348560600872,
5
  "eval_steps": 500,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -727,6 +727,78 @@
727
  "rewards/margins": -0.0024731969460844994,
728
  "rewards/rejected": -0.13805550336837769,
729
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 1.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06428783416660959,
5
  "eval_steps": 500,
6
+ "global_step": 44,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
727
  "rewards/margins": -0.0024731969460844994,
728
  "rewards/rejected": -0.13805550336837769,
729
  "step": 40
730
+ },
731
+ {
732
+ "epoch": 0.059904572746158936,
733
+ "grad_norm": 0.5273575186729431,
734
+ "learning_rate": 9.937896060529485e-06,
735
+ "log_odds_chosen": -0.08113294094800949,
736
+ "log_odds_ratio": -0.7743253707885742,
737
+ "logits/chosen": -1.9785962104797363,
738
+ "logits/rejected": -1.9705551862716675,
739
+ "logps/chosen": -1.2821004390716553,
740
+ "logps/rejected": -1.208033561706543,
741
+ "loss": 1.5836,
742
+ "nll_loss": 1.5061376094818115,
743
+ "rewards/accuracies": 0.234375,
744
+ "rewards/chosen": -0.12821003794670105,
745
+ "rewards/margins": -0.007406666409224272,
746
+ "rewards/rejected": -0.12080337107181549,
747
+ "step": 41
748
+ },
749
+ {
750
+ "epoch": 0.061365659886309154,
751
+ "grad_norm": 0.5236508250236511,
752
+ "learning_rate": 9.934197316154721e-06,
753
+ "log_odds_chosen": -0.13003796339035034,
754
+ "log_odds_ratio": -0.7939882278442383,
755
+ "logits/chosen": -1.9101815223693848,
756
+ "logits/rejected": -1.943664312362671,
757
+ "logps/chosen": -1.155348777770996,
758
+ "logps/rejected": -1.0918152332305908,
759
+ "loss": 1.4569,
760
+ "nll_loss": 1.377458095550537,
761
+ "rewards/accuracies": 0.328125,
762
+ "rewards/chosen": -0.11553487926721573,
763
+ "rewards/margins": -0.006353363860398531,
764
+ "rewards/rejected": -0.10918151587247849,
765
+ "step": 42
766
+ },
767
+ {
768
+ "epoch": 0.06282674702645938,
769
+ "grad_norm": 0.4699207842350006,
770
+ "learning_rate": 9.9303923195285e-06,
771
+ "log_odds_chosen": -0.041184213012456894,
772
+ "log_odds_ratio": -0.7406144142150879,
773
+ "logits/chosen": -1.9506869316101074,
774
+ "logits/rejected": -1.9735560417175293,
775
+ "logps/chosen": -1.2942121028900146,
776
+ "logps/rejected": -1.2584986686706543,
777
+ "loss": 1.5464,
778
+ "nll_loss": 1.4723409414291382,
779
+ "rewards/accuracies": 0.4375,
780
+ "rewards/chosen": -0.12942121922969818,
781
+ "rewards/margins": -0.0035713440738618374,
782
+ "rewards/rejected": -0.1258498728275299,
783
+ "step": 43
784
+ },
785
+ {
786
+ "epoch": 0.06428783416660959,
787
+ "grad_norm": 0.5235589742660522,
788
+ "learning_rate": 9.92648115258704e-06,
789
+ "log_odds_chosen": 0.04650488868355751,
790
+ "log_odds_ratio": -0.7010443806648254,
791
+ "logits/chosen": -1.9501118659973145,
792
+ "logits/rejected": -1.9561800956726074,
793
+ "logps/chosen": -1.1499981880187988,
794
+ "logps/rejected": -1.1835464239120483,
795
+ "loss": 1.4224,
796
+ "nll_loss": 1.3523142337799072,
797
+ "rewards/accuracies": 0.4375,
798
+ "rewards/chosen": -0.11499983072280884,
799
+ "rewards/margins": 0.003354821354150772,
800
+ "rewards/rejected": -0.11835464835166931,
801
+ "step": 44
802
  }
803
  ],
804
  "logging_steps": 1.0,