Rakhman16 commited on
Commit
37c1796
·
verified ·
1 Parent(s): 06ca696

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44ea6b7a6a2fff075f708703afadf30e3e8c57e3163b493b4645497b86d2384f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869559a5ae5ad7f1c26df10072eb69150c395ca8de50790bd49b4fe2680e9d2a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f98d95db80478cfe0e9aee8fa102c9b668166fcc2a159d0c1e6bcd8518b7bc45
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209a99db0577027fa2c361d3c3c432d63c3657ce2b73bf2d7a0cc3bbdd3a0773
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:572b27dd4aaf18aaca2cbee93be2b885a21373fc2b4cd02fc4f0e4185393316a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8441833b2ccddbdbfb81a5b41b410f598dfb2796bdfdf3689c4bebcaf3da6bb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b2b446c840bb8969c73f2b1624f62cc7b296b26a6d87dc45e0852da9ec5c8d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b133c003556814e78a8921a1c25cb078726e4bad98aa7a030c1c78956cb745
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11349175125360489,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-3000",
4
- "epoch": 0.5269166593483797,
5
  "eval_steps": 100,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -667,6 +667,116 @@
667
  "eval_samples_per_second": 25.455,
668
  "eval_steps_per_second": 3.185,
669
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
  }
671
  ],
672
  "logging_steps": 50,
@@ -686,7 +796,7 @@
686
  "attributes": {}
687
  }
688
  },
689
- "total_flos": 1.461498937344e+16,
690
  "train_batch_size": 8,
691
  "trial_name": null,
692
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.11222900450229645,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-3500",
4
+ "epoch": 0.6147361025731097,
5
  "eval_steps": 100,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
667
  "eval_samples_per_second": 25.455,
668
  "eval_steps_per_second": 3.185,
669
  "step": 3000
670
+ },
671
+ {
672
+ "epoch": 0.5356986036708528,
673
+ "grad_norm": 9072.705078125,
674
+ "learning_rate": 2.5981907605831725e-05,
675
+ "loss": 0.1249,
676
+ "step": 3050
677
+ },
678
+ {
679
+ "epoch": 0.5444805479933257,
680
+ "grad_norm": 11936.0400390625,
681
+ "learning_rate": 2.591603723871421e-05,
682
+ "loss": 0.1205,
683
+ "step": 3100
684
+ },
685
+ {
686
+ "epoch": 0.5444805479933257,
687
+ "eval_loss": 0.11320458352565765,
688
+ "eval_runtime": 174.8633,
689
+ "eval_samples_per_second": 25.506,
690
+ "eval_steps_per_second": 3.191,
691
+ "step": 3100
692
+ },
693
+ {
694
+ "epoch": 0.5532624923157987,
695
+ "grad_norm": 11488.6748046875,
696
+ "learning_rate": 2.58501668715967e-05,
697
+ "loss": 0.1202,
698
+ "step": 3150
699
+ },
700
+ {
701
+ "epoch": 0.5620444366382717,
702
+ "grad_norm": 12126.583984375,
703
+ "learning_rate": 2.5784296504479185e-05,
704
+ "loss": 0.12,
705
+ "step": 3200
706
+ },
707
+ {
708
+ "epoch": 0.5620444366382717,
709
+ "eval_loss": 0.11316446959972382,
710
+ "eval_runtime": 174.862,
711
+ "eval_samples_per_second": 25.506,
712
+ "eval_steps_per_second": 3.191,
713
+ "step": 3200
714
+ },
715
+ {
716
+ "epoch": 0.5708263809607447,
717
+ "grad_norm": 14138.3876953125,
718
+ "learning_rate": 2.5718426137361675e-05,
719
+ "loss": 0.1272,
720
+ "step": 3250
721
+ },
722
+ {
723
+ "epoch": 0.5796083252832177,
724
+ "grad_norm": 6703.96728515625,
725
+ "learning_rate": 2.5652555770244158e-05,
726
+ "loss": 0.1245,
727
+ "step": 3300
728
+ },
729
+ {
730
+ "epoch": 0.5796083252832177,
731
+ "eval_loss": 0.11248895525932312,
732
+ "eval_runtime": 175.0938,
733
+ "eval_samples_per_second": 25.472,
734
+ "eval_steps_per_second": 3.187,
735
+ "step": 3300
736
+ },
737
+ {
738
+ "epoch": 0.5883902696056907,
739
+ "grad_norm": 14042.6962890625,
740
+ "learning_rate": 2.5586685403126648e-05,
741
+ "loss": 0.1135,
742
+ "step": 3350
743
+ },
744
+ {
745
+ "epoch": 0.5971722139281637,
746
+ "grad_norm": 11223.4375,
747
+ "learning_rate": 2.5520815036009135e-05,
748
+ "loss": 0.1192,
749
+ "step": 3400
750
+ },
751
+ {
752
+ "epoch": 0.5971722139281637,
753
+ "eval_loss": 0.11230960488319397,
754
+ "eval_runtime": 175.0029,
755
+ "eval_samples_per_second": 25.485,
756
+ "eval_steps_per_second": 3.189,
757
+ "step": 3400
758
+ },
759
+ {
760
+ "epoch": 0.6059541582506367,
761
+ "grad_norm": 10186.056640625,
762
+ "learning_rate": 2.5454944668891625e-05,
763
+ "loss": 0.1133,
764
+ "step": 3450
765
+ },
766
+ {
767
+ "epoch": 0.6147361025731097,
768
+ "grad_norm": 11724.2939453125,
769
+ "learning_rate": 2.5389074301774108e-05,
770
+ "loss": 0.1191,
771
+ "step": 3500
772
+ },
773
+ {
774
+ "epoch": 0.6147361025731097,
775
+ "eval_loss": 0.11222900450229645,
776
+ "eval_runtime": 174.8917,
777
+ "eval_samples_per_second": 25.501,
778
+ "eval_steps_per_second": 3.191,
779
+ "step": 3500
780
  }
781
  ],
782
  "logging_steps": 50,
 
796
  "attributes": {}
797
  }
798
  },
799
+ "total_flos": 1.705082093568e+16,
800
  "train_batch_size": 8,
801
  "trial_name": null,
802
  "trial_params": null