ben81828 commited on
Commit
44286d7
·
verified ·
1 Parent(s): 0450f72

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ba83473ace8f2ecdbc048dafa2d00257b4fa3a981f66b8f547625be4d8d6a90
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0a41144b9d8eea3b3f1de8e9e5e7c14c303c31098798928859d47cbd861a53
3
  size 18516456
last-checkpoint/global_step550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2dd708bce8156f81e78f48af2f45249f50b48b1b10d3cccc754368c848a42c4
3
+ size 27700976
last-checkpoint/global_step550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7707a634b8052289bb808cab58425d2c29bb9ab889ca015141f434440020c92
3
+ size 27700976
last-checkpoint/global_step550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da189be86b251daa6a3d75c53724aa18a89d2bec0b692343472d0c9f34db7ca9
3
+ size 27700976
last-checkpoint/global_step550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51ddb526250554a510bf29668756d7e7bd859ccdb9723fc265e8b2c030c2bc23
3
+ size 27700976
last-checkpoint/global_step550/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fae2e4f2b1854438b05b6f32d9c6d08952bdf18b07e6c8719824f3cbb0fa55
3
+ size 411571
last-checkpoint/global_step550/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbbae66b52b89cb6be888983982f3c0ca6e723bfd1f4512b76672b9dc82aa8e1
3
+ size 411507
last-checkpoint/global_step550/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d37f690bd1d0259ae507a97d5795da4f0e813887d5a4e4978f511ea048053e38
3
+ size 411507
last-checkpoint/global_step550/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:059ca587c41d1e52a95054bc3e00214fd350ff19fc2a534776d29cee2872f2e5
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step500
 
1
+ global_step550
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdd1f02cb20d3f4f7e0dd26fea62af57e5e71316163f926a28ed6cf89a9f3777
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae78313eb528c8d3695eebaf4de3539bd0a0bc6ee18c66af1ee183442f1758a0
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc6d54ba2aa85e2f895439a1b787ec947b848a1c34ea5a3a28821572bf2b9fec
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b38031f60d9e88601d369ef46bcdcf2b5b03f2cb4ba93853bcb2328df7ebb7c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b6927d26551cddd8e35b34b43e79bd58f8b6027b6a481bb6a563a3652addeb4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58092375c93d237cd0e3149aecfbf83e2acdae46279e07a32920d01cb507e64
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8295b1be8e66b4b30cb905dc48cfc717c027e427937b8142d00ae9de8106c6a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83cd4bbff9962da7ec6787fcea8d65df7096917f9a5902e249ba7aee8887fe5f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab7567aeded1bd7ef9f3ba115e57865dd25bb569f9711f33170eb2a51540c216
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8b881f6464ee76e192f8a5dbebbec89a38d087d3502270b9c7e6038613f3b2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8908902406692505,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-300",
4
- "epoch": 0.25753283543651817,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -897,11 +897,100 @@
897
  "eval_steps_per_second": 0.874,
898
  "num_input_tokens_seen": 5848048,
899
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  }
901
  ],
902
  "logging_steps": 5,
903
  "max_steps": 3400,
904
- "num_input_tokens_seen": 5848048,
905
  "num_train_epochs": 2,
906
  "save_steps": 50,
907
  "stateful_callbacks": {
@@ -916,7 +1005,7 @@
916
  "attributes": {}
917
  }
918
  },
919
- "total_flos": 328347984855040.0,
920
  "train_batch_size": 1,
921
  "trial_name": null,
922
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8908902406692505,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-300",
4
+ "epoch": 0.28328611898017,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
897
  "eval_steps_per_second": 0.874,
898
  "num_input_tokens_seen": 5848048,
899
  "step": 500
900
+ },
901
+ {
902
+ "epoch": 0.26010816379088336,
903
+ "grad_norm": 0.25461397268106634,
904
+ "learning_rate": 9.736925561061871e-05,
905
+ "loss": 0.8954,
906
+ "num_input_tokens_seen": 5906512,
907
+ "step": 505
908
+ },
909
+ {
910
+ "epoch": 0.2626834921452485,
911
+ "grad_norm": 0.38602603275675745,
912
+ "learning_rate": 9.729086208503174e-05,
913
+ "loss": 0.8927,
914
+ "num_input_tokens_seen": 5965024,
915
+ "step": 510
916
+ },
917
+ {
918
+ "epoch": 0.2652588204996137,
919
+ "grad_norm": 0.150082825225123,
920
+ "learning_rate": 9.721135012358156e-05,
921
+ "loss": 0.898,
922
+ "num_input_tokens_seen": 6023496,
923
+ "step": 515
924
+ },
925
+ {
926
+ "epoch": 0.2678341488539789,
927
+ "grad_norm": 0.26881662025899655,
928
+ "learning_rate": 9.713072160673777e-05,
929
+ "loss": 0.9016,
930
+ "num_input_tokens_seen": 6082000,
931
+ "step": 520
932
+ },
933
+ {
934
+ "epoch": 0.2704094772083441,
935
+ "grad_norm": 0.5039123575147229,
936
+ "learning_rate": 9.704897844137673e-05,
937
+ "loss": 0.8842,
938
+ "num_input_tokens_seen": 6140480,
939
+ "step": 525
940
+ },
941
+ {
942
+ "epoch": 0.27298480556270927,
943
+ "grad_norm": 0.27836945453098666,
944
+ "learning_rate": 9.696612256073633e-05,
945
+ "loss": 0.8921,
946
+ "num_input_tokens_seen": 6198968,
947
+ "step": 530
948
+ },
949
+ {
950
+ "epoch": 0.2755601339170744,
951
+ "grad_norm": 0.22936338891946384,
952
+ "learning_rate": 9.688215592437039e-05,
953
+ "loss": 0.8979,
954
+ "num_input_tokens_seen": 6257464,
955
+ "step": 535
956
+ },
957
+ {
958
+ "epoch": 0.2781354622714396,
959
+ "grad_norm": 0.396486857609105,
960
+ "learning_rate": 9.679708051810221e-05,
961
+ "loss": 0.8951,
962
+ "num_input_tokens_seen": 6315944,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.2807107906258048,
967
+ "grad_norm": 0.4751226662261396,
968
+ "learning_rate": 9.67108983539777e-05,
969
+ "loss": 0.9149,
970
+ "num_input_tokens_seen": 6374408,
971
+ "step": 545
972
+ },
973
+ {
974
+ "epoch": 0.28328611898017,
975
+ "grad_norm": 0.26829103885131056,
976
+ "learning_rate": 9.662361147021779e-05,
977
+ "loss": 0.9013,
978
+ "num_input_tokens_seen": 6432936,
979
+ "step": 550
980
+ },
981
+ {
982
+ "epoch": 0.28328611898017,
983
+ "eval_loss": 0.9001271724700928,
984
+ "eval_runtime": 16.9878,
985
+ "eval_samples_per_second": 3.532,
986
+ "eval_steps_per_second": 0.883,
987
+ "num_input_tokens_seen": 6432936,
988
+ "step": 550
989
  }
990
  ],
991
  "logging_steps": 5,
992
  "max_steps": 3400,
993
+ "num_input_tokens_seen": 6432936,
994
  "num_train_epochs": 2,
995
  "save_steps": 50,
996
  "stateful_callbacks": {
 
1005
  "attributes": {}
1006
  }
1007
  },
1008
+ "total_flos": 361192817164288.0,
1009
  "train_batch_size": 1,
1010
  "trial_name": null,
1011
  "trial_params": null