joe611 commited on
Commit
42b03fc
·
verified ·
1 Parent(s): 4faed36

Training in progress, epoch 150, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b050b46f4abbe104a662b3e7daff91ad7aa7b0dc5b6b45c2c1e21223b9dd9a
3
  size 166496880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02a36bcf38ef6771ea1c0be938289226a5fe98c33ab1d85ca8a1ca855bb867c
3
  size 166496880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90435ff37114bd6582ded1147a5d07d469037df489412cd78505236316b1afb6
3
  size 330495866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416afdc0dbed0a1c50117e8afe577bcc73b29c9418c1d3ed682cff01eb911523
3
  size 330495866
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74ff39ce5bc1f6039b31922a6e443ab8d8a8f93d5528fc0d576340ae329fb493
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceefbb484abd4ed8dc8842b1277aaf2814ba0b1602ab48247135b7cfe2173c79
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb678f76da1c9347406d38fe82346b2ac3acd84e6118cb46f17ee79a3da28612
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:531fa53bf2bb93608e8c8228dd565bd5fe95292cd882c1935da101ac5d413c48
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.23615802824497223,
3
  "best_model_checkpoint": "chickens-composite-403232323232-150-epochs-w-transform-metrics-test/checkpoint-123000",
4
- "epoch": 149.0,
5
  "eval_steps": 500,
6
- "global_step": 149000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -38643,6 +38643,270 @@
38643
  "eval_samples_per_second": 15.14,
38644
  "eval_steps_per_second": 1.893,
38645
  "step": 149000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38646
  }
38647
  ],
38648
  "logging_steps": 30,
@@ -38657,12 +38921,12 @@
38657
  "should_evaluate": false,
38658
  "should_log": false,
38659
  "should_save": true,
38660
- "should_training_stop": false
38661
  },
38662
  "attributes": {}
38663
  }
38664
  },
38665
- "total_flos": 5.12594438234112e+19,
38666
  "train_batch_size": 2,
38667
  "trial_name": null,
38668
  "trial_params": null
 
1
  {
2
  "best_metric": 0.23615802824497223,
3
  "best_model_checkpoint": "chickens-composite-403232323232-150-epochs-w-transform-metrics-test/checkpoint-123000",
4
+ "epoch": 150.0,
5
  "eval_steps": 500,
6
+ "global_step": 150000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
38643
  "eval_samples_per_second": 15.14,
38644
  "eval_steps_per_second": 1.893,
38645
  "step": 149000
38646
+ },
38647
+ {
38648
+ "epoch": 149.01,
38649
+ "grad_norm": 54.10673141479492,
38650
+ "learning_rate": 1.074761413334957e-09,
38651
+ "loss": 0.3406,
38652
+ "step": 149010
38653
+ },
38654
+ {
38655
+ "epoch": 149.04,
38656
+ "grad_norm": 151.46478271484375,
38657
+ "learning_rate": 1.0106134441850712e-09,
38658
+ "loss": 0.2945,
38659
+ "step": 149040
38660
+ },
38661
+ {
38662
+ "epoch": 149.07,
38663
+ "grad_norm": 34.716007232666016,
38664
+ "learning_rate": 9.484389968766882e-10,
38665
+ "loss": 0.3195,
38666
+ "step": 149070
38667
+ },
38668
+ {
38669
+ "epoch": 149.1,
38670
+ "grad_norm": 45.507484436035156,
38671
+ "learning_rate": 8.88238095955174e-10,
38672
+ "loss": 0.2918,
38673
+ "step": 149100
38674
+ },
38675
+ {
38676
+ "epoch": 149.13,
38677
+ "grad_norm": 36.53764724731445,
38678
+ "learning_rate": 8.300107651859623e-10,
38679
+ "loss": 0.3201,
38680
+ "step": 149130
38681
+ },
38682
+ {
38683
+ "epoch": 149.16,
38684
+ "grad_norm": 83.43916320800781,
38685
+ "learning_rate": 7.737570275573314e-10,
38686
+ "loss": 0.2829,
38687
+ "step": 149160
38688
+ },
38689
+ {
38690
+ "epoch": 149.19,
38691
+ "grad_norm": 32.47480773925781,
38692
+ "learning_rate": 7.194769052765171e-10,
38693
+ "loss": 0.354,
38694
+ "step": 149190
38695
+ },
38696
+ {
38697
+ "epoch": 149.22,
38698
+ "grad_norm": 33.71446990966797,
38699
+ "learning_rate": 6.671704197735995e-10,
38700
+ "loss": 0.2461,
38701
+ "step": 149220
38702
+ },
38703
+ {
38704
+ "epoch": 149.25,
38705
+ "grad_norm": 60.157772064208984,
38706
+ "learning_rate": 6.168375916970615e-10,
38707
+ "loss": 0.2918,
38708
+ "step": 149250
38709
+ },
38710
+ {
38711
+ "epoch": 149.28,
38712
+ "grad_norm": 36.07717514038086,
38713
+ "learning_rate": 5.684784409182298e-10,
38714
+ "loss": 0.4074,
38715
+ "step": 149280
38716
+ },
38717
+ {
38718
+ "epoch": 149.31,
38719
+ "grad_norm": 87.16706085205078,
38720
+ "learning_rate": 5.220929865284996e-10,
38721
+ "loss": 0.3109,
38722
+ "step": 149310
38723
+ },
38724
+ {
38725
+ "epoch": 149.34,
38726
+ "grad_norm": 51.461036682128906,
38727
+ "learning_rate": 4.776812468398895e-10,
38728
+ "loss": 0.3124,
38729
+ "step": 149340
38730
+ },
38731
+ {
38732
+ "epoch": 149.37,
38733
+ "grad_norm": 53.12019729614258,
38734
+ "learning_rate": 4.3524323938559655e-10,
38735
+ "loss": 0.26,
38736
+ "step": 149370
38737
+ },
38738
+ {
38739
+ "epoch": 149.4,
38740
+ "grad_norm": 31.98765754699707,
38741
+ "learning_rate": 3.9477898091944135e-10,
38742
+ "loss": 0.2911,
38743
+ "step": 149400
38744
+ },
38745
+ {
38746
+ "epoch": 149.43,
38747
+ "grad_norm": 45.87418746948242,
38748
+ "learning_rate": 3.562884874158679e-10,
38749
+ "loss": 0.3493,
38750
+ "step": 149430
38751
+ },
38752
+ {
38753
+ "epoch": 149.46,
38754
+ "grad_norm": 50.62438201904297,
38755
+ "learning_rate": 3.1977177407105376e-10,
38756
+ "loss": 0.3425,
38757
+ "step": 149460
38758
+ },
38759
+ {
38760
+ "epoch": 149.49,
38761
+ "grad_norm": 118.38064575195312,
38762
+ "learning_rate": 2.8522885530013475e-10,
38763
+ "loss": 0.4231,
38764
+ "step": 149490
38765
+ },
38766
+ {
38767
+ "epoch": 149.52,
38768
+ "grad_norm": 50.225257873535156,
38769
+ "learning_rate": 2.5265974474109054e-10,
38770
+ "loss": 0.2677,
38771
+ "step": 149520
38772
+ },
38773
+ {
38774
+ "epoch": 149.55,
38775
+ "grad_norm": 253.9156951904297,
38776
+ "learning_rate": 2.2206445525085886e-10,
38777
+ "loss": 0.3737,
38778
+ "step": 149550
38779
+ },
38780
+ {
38781
+ "epoch": 149.58,
38782
+ "grad_norm": 46.01321029663086,
38783
+ "learning_rate": 1.9344299890866614e-10,
38784
+ "loss": 0.4259,
38785
+ "step": 149580
38786
+ },
38787
+ {
38788
+ "epoch": 149.61,
38789
+ "grad_norm": 127.60037994384766,
38790
+ "learning_rate": 1.6679538701325215e-10,
38791
+ "loss": 0.3044,
38792
+ "step": 149610
38793
+ },
38794
+ {
38795
+ "epoch": 149.64,
38796
+ "grad_norm": 74.46773529052734,
38797
+ "learning_rate": 1.4212163008509028e-10,
38798
+ "loss": 0.3339,
38799
+ "step": 149640
38800
+ },
38801
+ {
38802
+ "epoch": 149.67,
38803
+ "grad_norm": 92.41695404052734,
38804
+ "learning_rate": 1.1942173786527732e-10,
38805
+ "loss": 0.285,
38806
+ "step": 149670
38807
+ },
38808
+ {
38809
+ "epoch": 149.7,
38810
+ "grad_norm": 43.35792541503906,
38811
+ "learning_rate": 9.869571931442334e-11,
38812
+ "loss": 0.3565,
38813
+ "step": 149700
38814
+ },
38815
+ {
38816
+ "epoch": 149.73,
38817
+ "grad_norm": 63.197906494140625,
38818
+ "learning_rate": 7.994358261542712e-11,
38819
+ "loss": 0.2996,
38820
+ "step": 149730
38821
+ },
38822
+ {
38823
+ "epoch": 149.76,
38824
+ "grad_norm": 38.37726974487305,
38825
+ "learning_rate": 6.316533517125578e-11,
38826
+ "loss": 0.2403,
38827
+ "step": 149760
38828
+ },
38829
+ {
38830
+ "epoch": 149.79,
38831
+ "grad_norm": 30.22950553894043,
38832
+ "learning_rate": 4.83609836054999e-11,
38833
+ "loss": 0.2904,
38834
+ "step": 149790
38835
+ },
38836
+ {
38837
+ "epoch": 149.82,
38838
+ "grad_norm": 50.76008987426758,
38839
+ "learning_rate": 3.55305337634837e-11,
38840
+ "loss": 0.3492,
38841
+ "step": 149820
38842
+ },
38843
+ {
38844
+ "epoch": 149.85,
38845
+ "grad_norm": 64.36622619628906,
38846
+ "learning_rate": 2.467399070893439e-11,
38847
+ "loss": 0.3102,
38848
+ "step": 149850
38849
+ },
38850
+ {
38851
+ "epoch": 149.88,
38852
+ "grad_norm": 34.911922454833984,
38853
+ "learning_rate": 1.57913587295333e-11,
38854
+ "loss": 0.2852,
38855
+ "step": 149880
38856
+ },
38857
+ {
38858
+ "epoch": 149.91,
38859
+ "grad_norm": 77.51419067382812,
38860
+ "learning_rate": 8.882641330809627e-12,
38861
+ "loss": 0.3452,
38862
+ "step": 149910
38863
+ },
38864
+ {
38865
+ "epoch": 149.94,
38866
+ "grad_norm": 37.67948532104492,
38867
+ "learning_rate": 3.947841241136452e-12,
38868
+ "loss": 0.2644,
38869
+ "step": 149940
38870
+ },
38871
+ {
38872
+ "epoch": 149.97,
38873
+ "grad_norm": 105.01229095458984,
38874
+ "learning_rate": 9.869604078449612e-13,
38875
+ "loss": 0.3033,
38876
+ "step": 149970
38877
+ },
38878
+ {
38879
+ "epoch": 150.0,
38880
+ "grad_norm": 62.513954162597656,
38881
+ "learning_rate": 0.0,
38882
+ "loss": 0.2848,
38883
+ "step": 150000
38884
+ },
38885
+ {
38886
+ "epoch": 150.0,
38887
+ "eval_loss": 0.23936401307582855,
38888
+ "eval_map": 0.8413,
38889
+ "eval_map_50": 0.9641,
38890
+ "eval_map_75": 0.9341,
38891
+ "eval_map_chicken": 0.8309,
38892
+ "eval_map_duck": 0.7956,
38893
+ "eval_map_large": 0.8507,
38894
+ "eval_map_medium": 0.8408,
38895
+ "eval_map_plant": 0.8973,
38896
+ "eval_map_small": 0.3268,
38897
+ "eval_mar_1": 0.3376,
38898
+ "eval_mar_10": 0.8711,
38899
+ "eval_mar_100": 0.8749,
38900
+ "eval_mar_100_chicken": 0.8738,
38901
+ "eval_mar_100_duck": 0.8294,
38902
+ "eval_mar_100_plant": 0.9215,
38903
+ "eval_mar_large": 0.881,
38904
+ "eval_mar_medium": 0.8792,
38905
+ "eval_mar_small": 0.3947,
38906
+ "eval_runtime": 14.2916,
38907
+ "eval_samples_per_second": 13.994,
38908
+ "eval_steps_per_second": 1.749,
38909
+ "step": 150000
38910
  }
38911
  ],
38912
  "logging_steps": 30,
 
38921
  "should_evaluate": false,
38922
  "should_log": false,
38923
  "should_save": true,
38924
+ "should_training_stop": true
38925
  },
38926
  "attributes": {}
38927
  }
38928
  },
38929
+ "total_flos": 5.160346693632e+19,
38930
  "train_batch_size": 2,
38931
  "trial_name": null,
38932
  "trial_params": null