Training in progress, step 550, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step550/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step550/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step550/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step550/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18516456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad0a41144b9d8eea3b3f1de8e9e5e7c14c303c31098798928859d47cbd861a53
|
3 |
size 18516456
|
last-checkpoint/global_step550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2dd708bce8156f81e78f48af2f45249f50b48b1b10d3cccc754368c848a42c4
|
3 |
+
size 27700976
|
last-checkpoint/global_step550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7707a634b8052289bb808cab58425d2c29bb9ab889ca015141f434440020c92
|
3 |
+
size 27700976
|
last-checkpoint/global_step550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da189be86b251daa6a3d75c53724aa18a89d2bec0b692343472d0c9f34db7ca9
|
3 |
+
size 27700976
|
last-checkpoint/global_step550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51ddb526250554a510bf29668756d7e7bd859ccdb9723fc265e8b2c030c2bc23
|
3 |
+
size 27700976
|
last-checkpoint/global_step550/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5fae2e4f2b1854438b05b6f32d9c6d08952bdf18b07e6c8719824f3cbb0fa55
|
3 |
+
size 411571
|
last-checkpoint/global_step550/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbbae66b52b89cb6be888983982f3c0ca6e723bfd1f4512b76672b9dc82aa8e1
|
3 |
+
size 411507
|
last-checkpoint/global_step550/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d37f690bd1d0259ae507a97d5795da4f0e813887d5a4e4978f511ea048053e38
|
3 |
+
size 411507
|
last-checkpoint/global_step550/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:059ca587c41d1e52a95054bc3e00214fd350ff19fc2a534776d29cee2872f2e5
|
3 |
+
size 411507
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step550
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae78313eb528c8d3695eebaf4de3539bd0a0bc6ee18c66af1ee183442f1758a0
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b38031f60d9e88601d369ef46bcdcf2b5b03f2cb4ba93853bcb2328df7ebb7c
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f58092375c93d237cd0e3149aecfbf83e2acdae46279e07a32920d01cb507e64
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83cd4bbff9962da7ec6787fcea8d65df7096917f9a5902e249ba7aee8887fe5f
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e8b881f6464ee76e192f8a5dbebbec89a38d087d3502270b9c7e6038613f3b2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.8908902406692505,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-300",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -897,11 +897,100 @@
|
|
897 |
"eval_steps_per_second": 0.874,
|
898 |
"num_input_tokens_seen": 5848048,
|
899 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
900 |
}
|
901 |
],
|
902 |
"logging_steps": 5,
|
903 |
"max_steps": 3400,
|
904 |
-
"num_input_tokens_seen":
|
905 |
"num_train_epochs": 2,
|
906 |
"save_steps": 50,
|
907 |
"stateful_callbacks": {
|
@@ -916,7 +1005,7 @@
|
|
916 |
"attributes": {}
|
917 |
}
|
918 |
},
|
919 |
-
"total_flos":
|
920 |
"train_batch_size": 1,
|
921 |
"trial_name": null,
|
922 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.8908902406692505,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-300",
|
4 |
+
"epoch": 0.28328611898017,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 550,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
897 |
"eval_steps_per_second": 0.874,
|
898 |
"num_input_tokens_seen": 5848048,
|
899 |
"step": 500
|
900 |
+
},
|
901 |
+
{
|
902 |
+
"epoch": 0.26010816379088336,
|
903 |
+
"grad_norm": 0.25461397268106634,
|
904 |
+
"learning_rate": 9.736925561061871e-05,
|
905 |
+
"loss": 0.8954,
|
906 |
+
"num_input_tokens_seen": 5906512,
|
907 |
+
"step": 505
|
908 |
+
},
|
909 |
+
{
|
910 |
+
"epoch": 0.2626834921452485,
|
911 |
+
"grad_norm": 0.38602603275675745,
|
912 |
+
"learning_rate": 9.729086208503174e-05,
|
913 |
+
"loss": 0.8927,
|
914 |
+
"num_input_tokens_seen": 5965024,
|
915 |
+
"step": 510
|
916 |
+
},
|
917 |
+
{
|
918 |
+
"epoch": 0.2652588204996137,
|
919 |
+
"grad_norm": 0.150082825225123,
|
920 |
+
"learning_rate": 9.721135012358156e-05,
|
921 |
+
"loss": 0.898,
|
922 |
+
"num_input_tokens_seen": 6023496,
|
923 |
+
"step": 515
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"epoch": 0.2678341488539789,
|
927 |
+
"grad_norm": 0.26881662025899655,
|
928 |
+
"learning_rate": 9.713072160673777e-05,
|
929 |
+
"loss": 0.9016,
|
930 |
+
"num_input_tokens_seen": 6082000,
|
931 |
+
"step": 520
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"epoch": 0.2704094772083441,
|
935 |
+
"grad_norm": 0.5039123575147229,
|
936 |
+
"learning_rate": 9.704897844137673e-05,
|
937 |
+
"loss": 0.8842,
|
938 |
+
"num_input_tokens_seen": 6140480,
|
939 |
+
"step": 525
|
940 |
+
},
|
941 |
+
{
|
942 |
+
"epoch": 0.27298480556270927,
|
943 |
+
"grad_norm": 0.27836945453098666,
|
944 |
+
"learning_rate": 9.696612256073633e-05,
|
945 |
+
"loss": 0.8921,
|
946 |
+
"num_input_tokens_seen": 6198968,
|
947 |
+
"step": 530
|
948 |
+
},
|
949 |
+
{
|
950 |
+
"epoch": 0.2755601339170744,
|
951 |
+
"grad_norm": 0.22936338891946384,
|
952 |
+
"learning_rate": 9.688215592437039e-05,
|
953 |
+
"loss": 0.8979,
|
954 |
+
"num_input_tokens_seen": 6257464,
|
955 |
+
"step": 535
|
956 |
+
},
|
957 |
+
{
|
958 |
+
"epoch": 0.2781354622714396,
|
959 |
+
"grad_norm": 0.396486857609105,
|
960 |
+
"learning_rate": 9.679708051810221e-05,
|
961 |
+
"loss": 0.8951,
|
962 |
+
"num_input_tokens_seen": 6315944,
|
963 |
+
"step": 540
|
964 |
+
},
|
965 |
+
{
|
966 |
+
"epoch": 0.2807107906258048,
|
967 |
+
"grad_norm": 0.4751226662261396,
|
968 |
+
"learning_rate": 9.67108983539777e-05,
|
969 |
+
"loss": 0.9149,
|
970 |
+
"num_input_tokens_seen": 6374408,
|
971 |
+
"step": 545
|
972 |
+
},
|
973 |
+
{
|
974 |
+
"epoch": 0.28328611898017,
|
975 |
+
"grad_norm": 0.26829103885131056,
|
976 |
+
"learning_rate": 9.662361147021779e-05,
|
977 |
+
"loss": 0.9013,
|
978 |
+
"num_input_tokens_seen": 6432936,
|
979 |
+
"step": 550
|
980 |
+
},
|
981 |
+
{
|
982 |
+
"epoch": 0.28328611898017,
|
983 |
+
"eval_loss": 0.9001271724700928,
|
984 |
+
"eval_runtime": 16.9878,
|
985 |
+
"eval_samples_per_second": 3.532,
|
986 |
+
"eval_steps_per_second": 0.883,
|
987 |
+
"num_input_tokens_seen": 6432936,
|
988 |
+
"step": 550
|
989 |
}
|
990 |
],
|
991 |
"logging_steps": 5,
|
992 |
"max_steps": 3400,
|
993 |
+
"num_input_tokens_seen": 6432936,
|
994 |
"num_train_epochs": 2,
|
995 |
"save_steps": 50,
|
996 |
"stateful_callbacks": {
|
|
|
1005 |
"attributes": {}
|
1006 |
}
|
1007 |
},
|
1008 |
+
"total_flos": 361192817164288.0,
|
1009 |
"train_batch_size": 1,
|
1010 |
"trial_name": null,
|
1011 |
"trial_params": null
|