mattbonnell commited on
Commit
8403713
·
verified ·
1 Parent(s): 2715521

Training in progress, step 5500, checkpoint

Browse files
last-checkpoint/global_step5500/mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3bc55a30f02a3f26350bd9dbb16294a15a897b075fd34456a88839ee77910c4
3
  size 197282509
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cf14511d531f879ee2f6d406031ad511a3c06b18eed385a73b0dc215190115
3
  size 197282509
last-checkpoint/global_step5500/zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cd182af3b4c944245fd020d35a0d824bf71c439f21ecda9cfb84927e3bf4593
3
  size 180416968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b95be6942dace2bc3d825fa1446c5b063d8b4ee4c7ada108b22b131515ff13
3
  size 180416968
last-checkpoint/global_step5500/zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbdab2ae29a88e68fc1af1995e70903688b507c709051f3fc347179a065e0e55
3
  size 180416776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6346f87e421ad31c9b43ee470eab53306c511165d8d0c10038223ac6d789e927
3
  size 180416776
last-checkpoint/global_step5500/zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffbf390a132b39ce10451e04f633f16d3d9d6dbcf32a1b84272612507c10194d
3
  size 180416776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77cab693c8a4d6e0a3940be216a317089eeeb43f17a2d2d833c433151aad0dc5
3
  size 180416776
last-checkpoint/global_step5500/zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e237aedc1ff457bcc7f26444494d869759759d1ba70524cc04e3661daf6c5a5a
3
  size 180416904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733ad6e8b2d322d4e9fe62f82fd3e8f7f5bf482a7a6145cc963bda9577b003ad
3
  size 180416904
last-checkpoint/global_step5500/zero_pp_rank_4_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f78a218d94aea89b6a2546b76fa9c2b6630eedc525cf3c9cc507aecb2c8cea1
3
  size 180416712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ededfa3ecb0f275dc715e2d4cfb49f679e813f3991d87dbadd47b80273eed9f3
3
  size 180416712
last-checkpoint/global_step5500/zero_pp_rank_5_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96767b9e7ca20ce511539e7c55cb887f1c5f3515e14ce6d12290cd51ccc0b127
3
  size 180417096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c4d3ed7c0c49b879a17e3ce8fa63eb45569d90d6a482ab717098c73b241d5f4
3
  size 180417096
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5000
 
1
+ global_step5500
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57d4fd4308febebec1afff358cf12ab56256227add3b2957e331f5cd2e0691a4
3
  size 188836816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706a405a712fb9cea8d3cdff6665196d4ab0a60404fa36039d2d2dfad216e4b0
3
  size 188836816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3f3878f9de7b43bbfd935e88b0d1db5f297e106844e639e3ee8f61d23b71537
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4637f1f5c1da9bb29154d9963121566ca1c02eb09711378004397eb0d9601a2c
3
  size 15536
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08dc69792353f98f3975f2146c88ff60332193593dccd2ba5cecd781b9997d67
3
- size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c45dd7c491ef8cfd05485f13a9666de8a90413d508f927cbd46a1f5f1b8ffb
3
+ size 15472
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3d105c1a1aa212e4c659fcde3507788ba2d1192d170b13840512005d89c43c0
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38614bdb1925ed297bb20ccfe06b7ec2af4d8a762a5ae888492a250a49f694a7
3
  size 15536
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67b13c4c348b0a86bf4854b80c140b765c791fc26576cf4cc28641b09a92b2c2
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:404fdaaba0be21984fe31d2c6a0497ee60427a53f8ff8c86eca1f8f176009870
3
  size 15536
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7cca33316c4c01433fe48556b2852d35fc05fe7ebfbd4c36c5bfb4e6f315283
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced3ab1b81f732a49c9788e74bc896f0ee9b2b5818128f9450e4a5af9190e6f2
3
+ size 15536
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67a8ec020afb0b24788f834efe44fce3daac72b42efaf574b93244a2db48d417
3
- size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6301d10b310bc985fcae6a7295ae99ac9e593c77c7cac2ef61d2dba1e9541e7d
3
+ size 15600
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2464b12e1ca0e0289d4336d5d6478dcc4211982d590e1532eea5a3218a3c1d09
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff0c32be67ef9a7f2ea73a46b364adf3b30fe6a1d94a39f03dae5f70dfb3e7d2
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 135.13513513513513,
5
  "eval_steps": 1500,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -725,6 +725,76 @@
725
  "learning_rate": 0.0001,
726
  "loss": 0.0331,
727
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  }
729
  ],
730
  "logging_steps": 50,
@@ -744,7 +814,7 @@
744
  "attributes": {}
745
  }
746
  },
747
- "total_flos": 1.4035163536540303e+20,
748
  "train_batch_size": 64,
749
  "trial_name": null,
750
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 148.64864864864865,
5
  "eval_steps": 1500,
6
+ "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
725
  "learning_rate": 0.0001,
726
  "loss": 0.0331,
727
  "step": 5000
728
+ },
729
+ {
730
+ "epoch": 136.48648648648648,
731
+ "grad_norm": 0.4285108149051666,
732
+ "learning_rate": 0.0001,
733
+ "loss": 0.0304,
734
+ "step": 5050
735
+ },
736
+ {
737
+ "epoch": 137.83783783783784,
738
+ "grad_norm": 0.3434101343154907,
739
+ "learning_rate": 0.0001,
740
+ "loss": 0.0294,
741
+ "step": 5100
742
+ },
743
+ {
744
+ "epoch": 139.1891891891892,
745
+ "grad_norm": 0.41777992248535156,
746
+ "learning_rate": 0.0001,
747
+ "loss": 0.0302,
748
+ "step": 5150
749
+ },
750
+ {
751
+ "epoch": 140.54054054054055,
752
+ "grad_norm": 0.3897533714771271,
753
+ "learning_rate": 0.0001,
754
+ "loss": 0.0303,
755
+ "step": 5200
756
+ },
757
+ {
758
+ "epoch": 141.8918918918919,
759
+ "grad_norm": 0.3457304537296295,
760
+ "learning_rate": 0.0001,
761
+ "loss": 0.0297,
762
+ "step": 5250
763
+ },
764
+ {
765
+ "epoch": 143.24324324324326,
766
+ "grad_norm": 0.38188374042510986,
767
+ "learning_rate": 0.0001,
768
+ "loss": 0.0291,
769
+ "step": 5300
770
+ },
771
+ {
772
+ "epoch": 144.59459459459458,
773
+ "grad_norm": 0.44426918029785156,
774
+ "learning_rate": 0.0001,
775
+ "loss": 0.0308,
776
+ "step": 5350
777
+ },
778
+ {
779
+ "epoch": 145.94594594594594,
780
+ "grad_norm": 0.46593207120895386,
781
+ "learning_rate": 0.0001,
782
+ "loss": 0.0306,
783
+ "step": 5400
784
+ },
785
+ {
786
+ "epoch": 147.2972972972973,
787
+ "grad_norm": 0.5084848403930664,
788
+ "learning_rate": 0.0001,
789
+ "loss": 0.0293,
790
+ "step": 5450
791
+ },
792
+ {
793
+ "epoch": 148.64864864864865,
794
+ "grad_norm": 0.35385948419570923,
795
+ "learning_rate": 0.0001,
796
+ "loss": 0.0307,
797
+ "step": 5500
798
  }
799
  ],
800
  "logging_steps": 50,
 
814
  "attributes": {}
815
  }
816
  },
817
+ "total_flos": 1.5440555615799634e+20,
818
  "train_batch_size": 64,
819
  "trial_name": null,
820
  "trial_params": null