ben81828 commited on
Commit
26e5e38
·
verified ·
1 Parent(s): 4649441

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32d6d14f8c29da8cdb8be1658c159a9d27e00215388ecaf0c21dc7d9ce80b66e
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea0b12291caef0384f7fd3bc0b1e4fc7815f7b867e0e565a267ef13238fd6a9
3
  size 29034840
last-checkpoint/global_step550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:404b0fc604b3f7b9510dc1d2d9f73378777d3cc1650ecf62e6013178b86df089
3
+ size 43429616
last-checkpoint/global_step550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddf6c2b7fc0914636775966ff46b52649452ca9677fd31adc8ef827ee8c46f8
3
+ size 43429616
last-checkpoint/global_step550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd201e14e8bc36621608ee22a4dd0a7e7fa64ba00e281c3405ea9a3f6ed49f1
3
+ size 43429616
last-checkpoint/global_step550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211f53152ca6696db7f31ba8043540fd487a13aae01fe3c3247100d7d8d34386
3
+ size 43429616
last-checkpoint/global_step550/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1f1e88f3bf0a1ab4f6fdcbce21f2206f381a04328feda2f67246fb4023f22be
3
+ size 637299
last-checkpoint/global_step550/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096829d7819569a0a45c1f1ccd20ed09f7c621736d3582368c85eb1f4d51d913
3
+ size 637171
last-checkpoint/global_step550/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5fa69fa8e76e9121e0fe023594a90145c030a6aa13d4e5007cb9f1a4268ed73
3
+ size 637171
last-checkpoint/global_step550/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b15fb48a94b40a9b7267e646ab4d8caeff42e109740ea9800d79bc394acc6b40
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step500
 
1
+ global_step550
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdd1f02cb20d3f4f7e0dd26fea62af57e5e71316163f926a28ed6cf89a9f3777
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae78313eb528c8d3695eebaf4de3539bd0a0bc6ee18c66af1ee183442f1758a0
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc6d54ba2aa85e2f895439a1b787ec947b848a1c34ea5a3a28821572bf2b9fec
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b38031f60d9e88601d369ef46bcdcf2b5b03f2cb4ba93853bcb2328df7ebb7c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b6927d26551cddd8e35b34b43e79bd58f8b6027b6a481bb6a563a3652addeb4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58092375c93d237cd0e3149aecfbf83e2acdae46279e07a32920d01cb507e64
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8295b1be8e66b4b30cb905dc48cfc717c027e427937b8142d00ae9de8106c6a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83cd4bbff9962da7ec6787fcea8d65df7096917f9a5902e249ba7aee8887fe5f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:938da5f0b93537debcb2a90ed511f9cb6684c34d5cc24b3eeea4c7db619e8fb7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48642e777392e25274bb934c3caefd33d14bddceae2e006daf244ac2f6537412
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6505001187324524,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-500",
4
- "epoch": 0.14771048744460857,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -897,11 +897,100 @@
897
  "eval_steps_per_second": 0.792,
898
  "num_input_tokens_seen": 5183664,
899
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  }
901
  ],
902
  "logging_steps": 5,
903
  "max_steps": 6770,
904
- "num_input_tokens_seen": 5183664,
905
  "num_train_epochs": 2,
906
  "save_steps": 50,
907
  "stateful_callbacks": {
@@ -916,7 +1005,7 @@
916
  "attributes": {}
917
  }
918
  },
919
- "total_flos": 341890836201472.0,
920
  "train_batch_size": 1,
921
  "trial_name": null,
922
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6505001187324524,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-500",
4
+ "epoch": 0.16248153618906944,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
897
  "eval_steps_per_second": 0.792,
898
  "num_input_tokens_seen": 5183664,
899
  "step": 500
900
+ },
901
+ {
902
+ "epoch": 0.14918759231905465,
903
+ "grad_norm": 11.975231950682153,
904
+ "learning_rate": 9.983569116887128e-05,
905
+ "loss": 0.573,
906
+ "num_input_tokens_seen": 5234920,
907
+ "step": 505
908
+ },
909
+ {
910
+ "epoch": 0.15066469719350073,
911
+ "grad_norm": 13.701425113518031,
912
+ "learning_rate": 9.982564982146327e-05,
913
+ "loss": 0.6261,
914
+ "num_input_tokens_seen": 5287312,
915
+ "step": 510
916
+ },
917
+ {
918
+ "epoch": 0.15214180206794684,
919
+ "grad_norm": 6.956866723254775,
920
+ "learning_rate": 9.981531121472811e-05,
921
+ "loss": 0.6072,
922
+ "num_input_tokens_seen": 5340240,
923
+ "step": 515
924
+ },
925
+ {
926
+ "epoch": 0.1536189069423929,
927
+ "grad_norm": 5.293847645949678,
928
+ "learning_rate": 9.980467541034584e-05,
929
+ "loss": 0.565,
930
+ "num_input_tokens_seen": 5392600,
931
+ "step": 520
932
+ },
933
+ {
934
+ "epoch": 0.155096011816839,
935
+ "grad_norm": 3.5333148010719357,
936
+ "learning_rate": 9.979374247176956e-05,
937
+ "loss": 0.6188,
938
+ "num_input_tokens_seen": 5445168,
939
+ "step": 525
940
+ },
941
+ {
942
+ "epoch": 0.15657311669128507,
943
+ "grad_norm": 2.715838950258193,
944
+ "learning_rate": 9.978251246422505e-05,
945
+ "loss": 0.6069,
946
+ "num_input_tokens_seen": 5496384,
947
+ "step": 530
948
+ },
949
+ {
950
+ "epoch": 0.15805022156573117,
951
+ "grad_norm": 7.400638197441027,
952
+ "learning_rate": 9.977098545471046e-05,
953
+ "loss": 0.5805,
954
+ "num_input_tokens_seen": 5548264,
955
+ "step": 535
956
+ },
957
+ {
958
+ "epoch": 0.15952732644017725,
959
+ "grad_norm": 8.936418653401088,
960
+ "learning_rate": 9.975916151199579e-05,
961
+ "loss": 0.6383,
962
+ "num_input_tokens_seen": 5599216,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.16100443131462333,
967
+ "grad_norm": 7.142901090509074,
968
+ "learning_rate": 9.974704070662254e-05,
969
+ "loss": 0.5845,
970
+ "num_input_tokens_seen": 5650816,
971
+ "step": 545
972
+ },
973
+ {
974
+ "epoch": 0.16248153618906944,
975
+ "grad_norm": 18.523556086651276,
976
+ "learning_rate": 9.973462311090336e-05,
977
+ "loss": 0.5957,
978
+ "num_input_tokens_seen": 5703016,
979
+ "step": 550
980
+ },
981
+ {
982
+ "epoch": 0.16248153618906944,
983
+ "eval_loss": 0.6883422136306763,
984
+ "eval_runtime": 19.183,
985
+ "eval_samples_per_second": 3.128,
986
+ "eval_steps_per_second": 0.782,
987
+ "num_input_tokens_seen": 5703016,
988
+ "step": 550
989
  }
990
  ],
991
  "logging_steps": 5,
992
  "max_steps": 6770,
993
+ "num_input_tokens_seen": 5703016,
994
  "num_train_epochs": 2,
995
  "save_steps": 50,
996
  "stateful_callbacks": {
 
1005
  "attributes": {}
1006
  }
1007
  },
1008
+ "total_flos": 376097589690368.0,
1009
  "train_batch_size": 1,
1010
  "trial_name": null,
1011
  "trial_params": null