ben81828 commited on
Commit
453f891
1 Parent(s): 1b7780a

Training in progress, step 3400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8815c16681432a11b5ac188380ad3a07078a835a470d216013977c67742fdcc
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e3dd7488301b3ff8268dcffb6f575eefe718b69eb21c58090d96141890a4fd7
3
  size 18516456
last-checkpoint/global_step3399/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5377ce9ec944960b3451a8668ed2930b7d08a3218dc6c36c7af91660799a564
3
+ size 27700976
last-checkpoint/global_step3399/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642f46e60fe60bc51e4297db96d429c44bcbb7de7c17fa6343f398bc77a5f7b7
3
+ size 27700976
last-checkpoint/global_step3399/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0986456f1240e9a82a370b55c1b4f69c9fe5a39987a6cbd36d5459de46bc50a1
3
+ size 27700976
last-checkpoint/global_step3399/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:428d4ad8a46b8657df7181502a012cc6ed010aac1422fd4407a9f3b90c5f2245
3
+ size 27700976
last-checkpoint/global_step3399/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8be0604bac04683caeabc3351d36a81d128263368a7e48830a613677261d0f5
3
+ size 411571
last-checkpoint/global_step3399/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ceae8833e74f99b4451636773e39fc36c4f0ae5015d18206d304968d397d04a
3
+ size 411507
last-checkpoint/global_step3399/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa075ddbef52cce7c5a92e4246c0d6329b0fde68c384da28862b974f36cbb9c
3
+ size 411507
last-checkpoint/global_step3399/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecfe6be46982c753e1f7606377f3951ed6043daec06db0db276ef338ee316b03
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3349
 
1
+ global_step3399
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe9163f042a56ab41ea5c2436dff084d8a4a6358e7f4cb1f18e04cb69810300
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b33fbd97eb762e874f342b555135e0178fee9c63fa00114395986bd49c7d6c
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c621ead8d06a0f1d00c5217cc2cfdc90c8c62fa1cb0da0986461ec51fd1766b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ea9cac9af94198fada9ef3d4fae4312ce5ac99a95501a7745aeb7f91fcb6b08
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eff8dbefa4ff395a5376144d756cbe824baaab98a892f200d30b7916c24d27cf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e9f829a7622427f225a6c2d17e591979f9a3ce0b403f5f12527ef6cddec21d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd67eb847a256b4f0de5857c5e8a43697485d1a0f6032004d0bc19149d77879c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db61901b1f811673403412b38c3433989e32dba9ce91026522943ae0e96f1d82
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df3be552cf2524f9ece2b6a286f0ce246d18d14d42f9b8c771a555e051bcee33
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fc2897164e26dbf189cf39613143884cb612b3f808a6a18c481ece64d73bc7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 1.725212464589235,
5
  "eval_steps": 50,
6
- "global_step": 3350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5970,11 +5970,100 @@
5970
  "eval_steps_per_second": 0.93,
5971
  "num_input_tokens_seen": 39175888,
5972
  "step": 3350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5973
  }
5974
  ],
5975
  "logging_steps": 5,
5976
  "max_steps": 3400,
5977
- "num_input_tokens_seen": 39175888,
5978
  "num_train_epochs": 2,
5979
  "save_steps": 50,
5980
  "stateful_callbacks": {
@@ -5984,12 +6073,12 @@
5984
  "should_evaluate": false,
5985
  "should_log": false,
5986
  "should_save": true,
5987
- "should_training_stop": false
5988
  },
5989
  "attributes": {}
5990
  }
5991
  },
5992
- "total_flos": 2199919870083072.0,
5993
  "train_batch_size": 1,
5994
  "trial_name": null,
5995
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.750965748132887,
5
  "eval_steps": 50,
6
+ "global_step": 3400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5970
  "eval_steps_per_second": 0.93,
5971
  "num_input_tokens_seen": 39175888,
5972
  "step": 3350
5973
+ },
5974
+ {
5975
+ "epoch": 1.7277877929436003,
5976
+ "grad_norm": 10.968051828666288,
5977
+ "learning_rate": 4.788399817602929e-08,
5978
+ "loss": 0.2565,
5979
+ "num_input_tokens_seen": 39234336,
5980
+ "step": 3355
5981
+ },
5982
+ {
5983
+ "epoch": 1.7303631212979655,
5984
+ "grad_norm": 5.1159559645491335,
5985
+ "learning_rate": 3.7835537837338506e-08,
5986
+ "loss": 0.2762,
5987
+ "num_input_tokens_seen": 39292800,
5988
+ "step": 3360
5989
+ },
5990
+ {
5991
+ "epoch": 1.7329384496523308,
5992
+ "grad_norm": 6.735859744015271,
5993
+ "learning_rate": 2.8968690057051828e-08,
5994
+ "loss": 0.2196,
5995
+ "num_input_tokens_seen": 39351272,
5996
+ "step": 3365
5997
+ },
5998
+ {
5999
+ "epoch": 1.7355137780066958,
6000
+ "grad_norm": 3.989003741597172,
6001
+ "learning_rate": 2.128366453743591e-08,
6002
+ "loss": 0.2482,
6003
+ "num_input_tokens_seen": 39409736,
6004
+ "step": 3370
6005
+ },
6006
+ {
6007
+ "epoch": 1.738089106361061,
6008
+ "grad_norm": 5.083412307953648,
6009
+ "learning_rate": 1.4780643030476438e-08,
6010
+ "loss": 0.2778,
6011
+ "num_input_tokens_seen": 39468176,
6012
+ "step": 3375
6013
+ },
6014
+ {
6015
+ "epoch": 1.740664434715426,
6016
+ "grad_norm": 7.4306605849577565,
6017
+ "learning_rate": 9.459779333587104e-09,
6018
+ "loss": 0.2048,
6019
+ "num_input_tokens_seen": 39526688,
6020
+ "step": 3380
6021
+ },
6022
+ {
6023
+ "epoch": 1.7432397630697913,
6024
+ "grad_norm": 4.202839419581782,
6025
+ "learning_rate": 5.3211992859791835e-09,
6026
+ "loss": 0.2296,
6027
+ "num_input_tokens_seen": 39585152,
6028
+ "step": 3385
6029
+ },
6030
+ {
6031
+ "epoch": 1.7458150914241566,
6032
+ "grad_norm": 7.909317855624412,
6033
+ "learning_rate": 2.3650007656805806e-09,
6034
+ "loss": 0.2713,
6035
+ "num_input_tokens_seen": 39643640,
6036
+ "step": 3390
6037
+ },
6038
+ {
6039
+ "epoch": 1.7483904197785218,
6040
+ "grad_norm": 7.880795429819755,
6041
+ "learning_rate": 5.912536872321184e-10,
6042
+ "loss": 0.2964,
6043
+ "num_input_tokens_seen": 39702144,
6044
+ "step": 3395
6045
+ },
6046
+ {
6047
+ "epoch": 1.750965748132887,
6048
+ "grad_norm": 4.00234080349809,
6049
+ "learning_rate": 0.0,
6050
+ "loss": 0.1797,
6051
+ "num_input_tokens_seen": 39760664,
6052
+ "step": 3400
6053
+ },
6054
+ {
6055
+ "epoch": 1.750965748132887,
6056
+ "eval_loss": 0.8603056073188782,
6057
+ "eval_runtime": 16.2474,
6058
+ "eval_samples_per_second": 3.693,
6059
+ "eval_steps_per_second": 0.923,
6060
+ "num_input_tokens_seen": 39760664,
6061
+ "step": 3400
6062
  }
6063
  ],
6064
  "logging_steps": 5,
6065
  "max_steps": 3400,
6066
+ "num_input_tokens_seen": 39760664,
6067
  "num_train_epochs": 2,
6068
  "save_steps": 50,
6069
  "stateful_callbacks": {
 
6073
  "should_evaluate": false,
6074
  "should_log": false,
6075
  "should_save": true,
6076
+ "should_training_stop": true
6077
  },
6078
  "attributes": {}
6079
  }
6080
  },
6081
+ "total_flos": 2232757993603072.0,
6082
  "train_batch_size": 1,
6083
  "trial_name": null,
6084
  "trial_params": null