Rakhman16 commited on
Commit
8296ff8
·
verified ·
1 Parent(s): b2e2406

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:869559a5ae5ad7f1c26df10072eb69150c395ca8de50790bd49b4fe2680e9d2a
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708bb2c9822b1d19cfc1f711a1426a731d98c1d2dfede98878aeb66a671c5d3d
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:209a99db0577027fa2c361d3c3c432d63c3657ce2b73bf2d7a0cc3bbdd3a0773
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e460fa778d644588069da89534b6210f9e1b28688b004d2b112bcf63437271
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8441833b2ccddbdbfb81a5b41b410f598dfb2796bdfdf3689c4bebcaf3da6bb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62c604dba9a000f0608f1abd7fa224ff8ff313b4269759d1f9ec8d5f614e346
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b133c003556814e78a8921a1c25cb078726e4bad98aa7a030c1c78956cb745
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a2f311d7dd68d3021f647c351f057c0b1803907e19da4973f0e7cb6dd34438
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11222900450229645,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-3500",
4
- "epoch": 0.6147361025731097,
5
  "eval_steps": 100,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -777,6 +777,116 @@
777
  "eval_samples_per_second": 25.501,
778
  "eval_steps_per_second": 3.191,
779
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  }
781
  ],
782
  "logging_steps": 50,
@@ -796,7 +906,7 @@
796
  "attributes": {}
797
  }
798
  },
799
- "total_flos": 1.705082093568e+16,
800
  "train_batch_size": 8,
801
  "trial_name": null,
802
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.11094118654727936,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-4000",
4
+ "epoch": 0.7025555457978396,
5
  "eval_steps": 100,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
777
  "eval_samples_per_second": 25.501,
778
  "eval_steps_per_second": 3.191,
779
  "step": 3500
780
+ },
781
+ {
782
+ "epoch": 0.6235180468955827,
783
+ "grad_norm": 13531.3447265625,
784
+ "learning_rate": 2.5323203934656598e-05,
785
+ "loss": 0.1178,
786
+ "step": 3550
787
+ },
788
+ {
789
+ "epoch": 0.6322999912180557,
790
+ "grad_norm": 10695.3486328125,
791
+ "learning_rate": 2.5257333567539085e-05,
792
+ "loss": 0.1175,
793
+ "step": 3600
794
+ },
795
+ {
796
+ "epoch": 0.6322999912180557,
797
+ "eval_loss": 0.11206092685461044,
798
+ "eval_runtime": 174.9921,
799
+ "eval_samples_per_second": 25.487,
800
+ "eval_steps_per_second": 3.189,
801
+ "step": 3600
802
+ },
803
+ {
804
+ "epoch": 0.6410819355405287,
805
+ "grad_norm": 9945.763671875,
806
+ "learning_rate": 2.5191463200421568e-05,
807
+ "loss": 0.1122,
808
+ "step": 3650
809
+ },
810
+ {
811
+ "epoch": 0.6498638798630016,
812
+ "grad_norm": 21472.962890625,
813
+ "learning_rate": 2.5125592833304058e-05,
814
+ "loss": 0.1179,
815
+ "step": 3700
816
+ },
817
+ {
818
+ "epoch": 0.6498638798630016,
819
+ "eval_loss": 0.11144611984491348,
820
+ "eval_runtime": 174.9363,
821
+ "eval_samples_per_second": 25.495,
822
+ "eval_steps_per_second": 3.19,
823
+ "step": 3700
824
+ },
825
+ {
826
+ "epoch": 0.6586458241854747,
827
+ "grad_norm": 10160.6298828125,
828
+ "learning_rate": 2.5059722466186545e-05,
829
+ "loss": 0.1148,
830
+ "step": 3750
831
+ },
832
+ {
833
+ "epoch": 0.6674277685079476,
834
+ "grad_norm": 13687.66015625,
835
+ "learning_rate": 2.4993852099069035e-05,
836
+ "loss": 0.1166,
837
+ "step": 3800
838
+ },
839
+ {
840
+ "epoch": 0.6674277685079476,
841
+ "eval_loss": 0.1111949160695076,
842
+ "eval_runtime": 174.9122,
843
+ "eval_samples_per_second": 25.499,
844
+ "eval_steps_per_second": 3.19,
845
+ "step": 3800
846
+ },
847
+ {
848
+ "epoch": 0.6762097128304206,
849
+ "grad_norm": 9961.3818359375,
850
+ "learning_rate": 2.4927981731951518e-05,
851
+ "loss": 0.12,
852
+ "step": 3850
853
+ },
854
+ {
855
+ "epoch": 0.6849916571528937,
856
+ "grad_norm": 11952.0546875,
857
+ "learning_rate": 2.4862111364834008e-05,
858
+ "loss": 0.1227,
859
+ "step": 3900
860
+ },
861
+ {
862
+ "epoch": 0.6849916571528937,
863
+ "eval_loss": 0.11115138977766037,
864
+ "eval_runtime": 175.0741,
865
+ "eval_samples_per_second": 25.475,
866
+ "eval_steps_per_second": 3.187,
867
+ "step": 3900
868
+ },
869
+ {
870
+ "epoch": 0.6937736014753666,
871
+ "grad_norm": 7658.37255859375,
872
+ "learning_rate": 2.4796240997716494e-05,
873
+ "loss": 0.1178,
874
+ "step": 3950
875
+ },
876
+ {
877
+ "epoch": 0.7025555457978396,
878
+ "grad_norm": 9078.0087890625,
879
+ "learning_rate": 2.4730370630598984e-05,
880
+ "loss": 0.1079,
881
+ "step": 4000
882
+ },
883
+ {
884
+ "epoch": 0.7025555457978396,
885
+ "eval_loss": 0.11094118654727936,
886
+ "eval_runtime": 174.8603,
887
+ "eval_samples_per_second": 25.506,
888
+ "eval_steps_per_second": 3.191,
889
+ "step": 4000
890
  }
891
  ],
892
  "logging_steps": 50,
 
906
  "attributes": {}
907
  }
908
  },
909
+ "total_flos": 1.948665249792e+16,
910
  "train_batch_size": 8,
911
  "trial_name": null,
912
  "trial_params": null