ToastyPigeon commited on
Commit
1ff3783
1 Parent(s): 0b614cb

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cd39093da46c760bf0ad6acee7d3afbdc458b320613d02a1b9b87bf1dfd3d57
3
  size 763470136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3445853db2177d069f37119bdbd70bd0ba40586305619d4889c69fce58f4e2
3
  size 763470136
last-checkpoint/global_step120/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c2455bdbf7ed8211f4bca0afdca943386a46d27f3fd0bb34935bdc2f028024f
3
+ size 1152331664
last-checkpoint/global_step120/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:034bf708f022804840c2a7a2a35e2eaf1b23a7b6e3eb07cd9831ba9fb2c905eb
3
+ size 1152331664
last-checkpoint/global_step120/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78e966d883cf2dbb7a88ea91bee8a0e48df3eeca5f8e29b5276b0b83199dd6b2
3
+ size 348711830
last-checkpoint/global_step120/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85b5ed1170e353a722624f40f020b904997b3571404b9c60761792611dc32657
3
+ size 348711830
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step90
 
1
+ global_step120
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:402dc8f05e787a8bd1f6017a51a44dd402264604e6e8b23372c047903d00e275
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e581c9b63b766f1f594238a5cca71c1532d5b91bc7cec6c92f09a1bc37dbbb3
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:385e4b7d88dae501cd7416828b65c0f6787ac9f75fe1a28f84bf64ac6675909d
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d032b1956d2c5ccbb0c6a5d0103db1f906cfb41ee1cfca2520e952693334eddb
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac3936d03564e869c0f8be0c0a24855f0fbaa9d17a65502bfe26d2b6f28ba7ad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b4ad7a9c00749c967e1489fcc0f9309722f8de3a5b956bd6a3ec0903f70bf4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6010016694490818,
5
  "eval_steps": 30,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -669,6 +669,224 @@
669
  "eval_samples_per_second": 0.314,
670
  "eval_steps_per_second": 0.157,
671
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  }
673
  ],
674
  "logging_steps": 1,
@@ -688,7 +906,7 @@
688
  "attributes": {}
689
  }
690
  },
691
- "total_flos": 1.0736256317128704e+16,
692
  "train_batch_size": 1,
693
  "trial_name": null,
694
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8013355592654424,
5
  "eval_steps": 30,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
669
  "eval_samples_per_second": 0.314,
670
  "eval_steps_per_second": 0.157,
671
  "step": 90
672
+ },
673
+ {
674
+ "epoch": 0.6076794657762938,
675
+ "grad_norm": 0.6753979170246298,
676
+ "learning_rate": 2.200234086189738e-05,
677
+ "loss": 1.2537,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 0.6143572621035058,
682
+ "grad_norm": 0.7571262328179167,
683
+ "learning_rate": 2.1514170974749814e-05,
684
+ "loss": 1.3105,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 0.6210350584307178,
689
+ "grad_norm": 0.7480070184420193,
690
+ "learning_rate": 2.1029015132779617e-05,
691
+ "loss": 1.3479,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 0.6277128547579299,
696
+ "grad_norm": 0.9346466324934937,
697
+ "learning_rate": 2.054711762656369e-05,
698
+ "loss": 1.26,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 0.6343906510851419,
703
+ "grad_norm": 0.735894594411153,
704
+ "learning_rate": 2.006872110600875e-05,
705
+ "loss": 1.1731,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 0.6410684474123539,
710
+ "grad_norm": 0.46805067393219063,
711
+ "learning_rate": 1.959406645816979e-05,
712
+ "loss": 1.0025,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 0.6477462437395659,
717
+ "grad_norm": 0.9111162797078164,
718
+ "learning_rate": 1.9123392685956238e-05,
719
+ "loss": 1.5097,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 0.654424040066778,
724
+ "grad_norm": 0.6156378595227986,
725
+ "learning_rate": 1.8656936787786722e-05,
726
+ "loss": 1.6741,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 0.66110183639399,
731
+ "grad_norm": 0.5608580027505492,
732
+ "learning_rate": 1.8194933638253293e-05,
733
+ "loss": 1.555,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 0.667779632721202,
738
+ "grad_norm": 0.5554367825501167,
739
+ "learning_rate": 1.7737615869854944e-05,
740
+ "loss": 1.1984,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 0.674457429048414,
745
+ "grad_norm": 0.6763768384451961,
746
+ "learning_rate": 1.72852137558602e-05,
747
+ "loss": 1.3085,
748
+ "step": 101
749
+ },
750
+ {
751
+ "epoch": 0.6811352253756261,
752
+ "grad_norm": 0.6496940883289803,
753
+ "learning_rate": 1.6837955094357533e-05,
754
+ "loss": 1.3538,
755
+ "step": 102
756
+ },
757
+ {
758
+ "epoch": 0.6878130217028381,
759
+ "grad_norm": 1.2157878682730905,
760
+ "learning_rate": 1.63960650935522e-05,
761
+ "loss": 1.3221,
762
+ "step": 103
763
+ },
764
+ {
765
+ "epoch": 0.6944908180300501,
766
+ "grad_norm": 0.6089604075942624,
767
+ "learning_rate": 1.5959766258367115e-05,
768
+ "loss": 1.2486,
769
+ "step": 104
770
+ },
771
+ {
772
+ "epoch": 0.7011686143572621,
773
+ "grad_norm": 0.5711464513081075,
774
+ "learning_rate": 1.552927827840493e-05,
775
+ "loss": 1.4322,
776
+ "step": 105
777
+ },
778
+ {
779
+ "epoch": 0.7078464106844741,
780
+ "grad_norm": 1.0441092347273087,
781
+ "learning_rate": 1.5104817917327696e-05,
782
+ "loss": 1.2896,
783
+ "step": 106
784
+ },
785
+ {
786
+ "epoch": 0.7145242070116862,
787
+ "grad_norm": 0.7066829352817637,
788
+ "learning_rate": 1.468659890370983e-05,
789
+ "loss": 1.32,
790
+ "step": 107
791
+ },
792
+ {
793
+ "epoch": 0.7212020033388982,
794
+ "grad_norm": 0.8630447275071343,
795
+ "learning_rate": 1.427483182341936e-05,
796
+ "loss": 1.1892,
797
+ "step": 108
798
+ },
799
+ {
800
+ "epoch": 0.7278797996661102,
801
+ "grad_norm": 0.6704662442015654,
802
+ "learning_rate": 1.3869724013581556e-05,
803
+ "loss": 1.0981,
804
+ "step": 109
805
+ },
806
+ {
807
+ "epoch": 0.7345575959933222,
808
+ "grad_norm": 0.6697560044361581,
809
+ "learning_rate": 1.3471479458178499e-05,
810
+ "loss": 1.431,
811
+ "step": 110
812
+ },
813
+ {
814
+ "epoch": 0.7412353923205343,
815
+ "grad_norm": 0.6301335224370612,
816
+ "learning_rate": 1.3080298685336958e-05,
817
+ "loss": 1.1401,
818
+ "step": 111
819
+ },
820
+ {
821
+ "epoch": 0.7479131886477463,
822
+ "grad_norm": 0.661516675824236,
823
+ "learning_rate": 1.2696378666356468e-05,
824
+ "loss": 1.3862,
825
+ "step": 112
826
+ },
827
+ {
828
+ "epoch": 0.7545909849749582,
829
+ "grad_norm": 0.8883090872439409,
830
+ "learning_rate": 1.2319912716528328e-05,
831
+ "loss": 1.3937,
832
+ "step": 113
833
+ },
834
+ {
835
+ "epoch": 0.7612687813021702,
836
+ "grad_norm": 0.618788476335029,
837
+ "learning_rate": 1.1951090397795546e-05,
838
+ "loss": 1.3605,
839
+ "step": 114
840
+ },
841
+ {
842
+ "epoch": 0.7679465776293823,
843
+ "grad_norm": 1.4912422269122867,
844
+ "learning_rate": 1.1590097423302684e-05,
845
+ "loss": 1.3289,
846
+ "step": 115
847
+ },
848
+ {
849
+ "epoch": 0.7746243739565943,
850
+ "grad_norm": 0.7244062524660102,
851
+ "learning_rate": 1.1237115563883693e-05,
852
+ "loss": 1.2132,
853
+ "step": 116
854
+ },
855
+ {
856
+ "epoch": 0.7813021702838063,
857
+ "grad_norm": 0.5640894490902431,
858
+ "learning_rate": 1.0892322556534839e-05,
859
+ "loss": 1.35,
860
+ "step": 117
861
+ },
862
+ {
863
+ "epoch": 0.7879799666110183,
864
+ "grad_norm": 1.0680883572048787,
865
+ "learning_rate": 1.0555892014918756e-05,
866
+ "loss": 1.1928,
867
+ "step": 118
868
+ },
869
+ {
870
+ "epoch": 0.7946577629382304,
871
+ "grad_norm": 0.5835392499890589,
872
+ "learning_rate": 1.022799334194475e-05,
873
+ "loss": 1.2834,
874
+ "step": 119
875
+ },
876
+ {
877
+ "epoch": 0.8013355592654424,
878
+ "grad_norm": 0.9118286380479439,
879
+ "learning_rate": 9.90879164446933e-06,
880
+ "loss": 1.3667,
881
+ "step": 120
882
+ },
883
+ {
884
+ "epoch": 0.8013355592654424,
885
+ "eval_loss": 0.9900997877120972,
886
+ "eval_runtime": 319.025,
887
+ "eval_samples_per_second": 0.313,
888
+ "eval_steps_per_second": 0.157,
889
+ "step": 120
890
  }
891
  ],
892
  "logging_steps": 1,
 
906
  "attributes": {}
907
  }
908
  },
909
+ "total_flos": 1.4315053922648064e+16,
910
  "train_batch_size": 1,
911
  "trial_name": null,
912
  "trial_params": null