ben81828 commited on
Commit
41a1a22
·
verified ·
1 Parent(s): 78271a3

Training in progress, step 2800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71879fd91931f28264e3a813a60350149ee278572f6af60c14a93354f4b2e458
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4775218e4d9519c7e7224a3c5f1838a68e0cfaca11bd2bcf9f97934c96fd4c
3
  size 29034840
last-checkpoint/global_step2800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2f0622c084fc6ee7213ea6ef083d7fbe78d41428bd7eef2426a18ec56926f83
3
+ size 43429616
last-checkpoint/global_step2800/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1bc89c27e182046c49b0a4eec741ec6d4abcf64fc546a7dbaf72c029b7f25c
3
+ size 43429616
last-checkpoint/global_step2800/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7300b60795f90634f69d1acd5bb6001a19025d8375de7ba93002af85968edc6a
3
+ size 43429616
last-checkpoint/global_step2800/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c485cc1fdb8e82d0fcbedfb3ea98649e5ecc11f53d2ab95dbb4149bbfa6bbdcc
3
+ size 43429616
last-checkpoint/global_step2800/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f997960f7f186166b975c90300d6bce3c01ae9ed696d6c6962cf37c6aef47b58
3
+ size 637299
last-checkpoint/global_step2800/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80a1d396d880ad0020e4df1d51ad7be0886c4ab66c722875e1e8ab089c2b229
3
+ size 637171
last-checkpoint/global_step2800/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ae9598d90f975eb10de5489e7c1fb42af3d9ba46a3f4ce3e6d95f228e07c6be
3
+ size 637171
last-checkpoint/global_step2800/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995e707df2b78c4fa80e869409936596ec459f4c04a98cb91524bd5a798c99f6
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2750
 
1
+ global_step2800
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d8d3c7739f9787ea797b86ff1b3a51f9e68197835ba3178915a8a77558f67fc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49db5a9fd0c84d580c671e52905ebeffc155b36537e76ff966d2e82906708999
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a22a57799bc43e59db67d9a787ed73040020c5f35990602033f4dab1318787d7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ca224562d8d97aaa131b3516288bb99f68d7dcf62170494326662bda0bb206
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29a624b936b77a04d6bfb6940acdd65a710bf39452e419e7ddb5c40fb2261072
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e86a11c89dba78d60e1b2a1855a651b90a5a22ef131ce65d26af83668c154e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a79306817d4440cd621149537e8cf216b60f847fc6f9531a6147426aa02bb07
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd63019c923e9692431619aced46b91aaf3fd22e1c22ec0a64347f2fe635a0e
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc99fc5a48a169bebd6bda86672afa6c721f544602a3586f7782ae7070075fc7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0dc59065f970fd13c57a16e5aa2c2ec0e5dc6ba16189267486fbd8cc465a6fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.1869634985923767,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-2350",
4
- "epoch": 0.8124076809453471,
5
  "eval_steps": 50,
6
- "global_step": 2750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4902,11 +4902,100 @@
4902
  "eval_steps_per_second": 0.778,
4903
  "num_input_tokens_seen": 28561248,
4904
  "step": 2750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4905
  }
4906
  ],
4907
  "logging_steps": 5,
4908
  "max_steps": 6770,
4909
- "num_input_tokens_seen": 28561248,
4910
  "num_train_epochs": 2,
4911
  "save_steps": 50,
4912
  "stateful_callbacks": {
@@ -4921,7 +5010,7 @@
4921
  "attributes": {}
4922
  }
4923
  },
4924
- "total_flos": 1884238220361728.0,
4925
  "train_batch_size": 1,
4926
  "trial_name": null,
4927
  "trial_params": null
 
1
  {
2
  "best_metric": 0.1869634985923767,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-2350",
4
+ "epoch": 0.827178729689808,
5
  "eval_steps": 50,
6
+ "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4902
  "eval_steps_per_second": 0.778,
4903
  "num_input_tokens_seen": 28561248,
4904
  "step": 2750
4905
+ },
4906
+ {
4907
+ "epoch": 0.8138847858197932,
4908
+ "grad_norm": 1.1919186743284782,
4909
+ "learning_rate": 6.903540125961965e-05,
4910
+ "loss": 0.225,
4911
+ "num_input_tokens_seen": 28613120,
4912
+ "step": 2755
4913
+ },
4914
+ {
4915
+ "epoch": 0.8153618906942393,
4916
+ "grad_norm": 1.4599715179768002,
4917
+ "learning_rate": 6.892241440886377e-05,
4918
+ "loss": 0.2365,
4919
+ "num_input_tokens_seen": 28664864,
4920
+ "step": 2760
4921
+ },
4922
+ {
4923
+ "epoch": 0.8168389955686853,
4924
+ "grad_norm": 1.2279132312954155,
4925
+ "learning_rate": 6.880931466717327e-05,
4926
+ "loss": 0.2386,
4927
+ "num_input_tokens_seen": 28716896,
4928
+ "step": 2765
4929
+ },
4930
+ {
4931
+ "epoch": 0.8183161004431314,
4932
+ "grad_norm": 22.206631253466607,
4933
+ "learning_rate": 6.86961027093001e-05,
4934
+ "loss": 0.2358,
4935
+ "num_input_tokens_seen": 28769528,
4936
+ "step": 2770
4937
+ },
4938
+ {
4939
+ "epoch": 0.8197932053175776,
4940
+ "grad_norm": 6.630701043823761,
4941
+ "learning_rate": 6.858277921066568e-05,
4942
+ "loss": 0.2844,
4943
+ "num_input_tokens_seen": 28821304,
4944
+ "step": 2775
4945
+ },
4946
+ {
4947
+ "epoch": 0.8212703101920237,
4948
+ "grad_norm": 0.7473512414072709,
4949
+ "learning_rate": 6.846934484735686e-05,
4950
+ "loss": 0.1867,
4951
+ "num_input_tokens_seen": 28872712,
4952
+ "step": 2780
4953
+ },
4954
+ {
4955
+ "epoch": 0.8227474150664698,
4956
+ "grad_norm": 1.1277045436573916,
4957
+ "learning_rate": 6.83558002961219e-05,
4958
+ "loss": 0.2184,
4959
+ "num_input_tokens_seen": 28924272,
4960
+ "step": 2785
4961
+ },
4962
+ {
4963
+ "epoch": 0.8242245199409158,
4964
+ "grad_norm": 14.609958116422174,
4965
+ "learning_rate": 6.824214623436644e-05,
4966
+ "loss": 0.1938,
4967
+ "num_input_tokens_seen": 28976352,
4968
+ "step": 2790
4969
+ },
4970
+ {
4971
+ "epoch": 0.8257016248153619,
4972
+ "grad_norm": 1.2950871631178849,
4973
+ "learning_rate": 6.812838334014951e-05,
4974
+ "loss": 0.2046,
4975
+ "num_input_tokens_seen": 29028344,
4976
+ "step": 2795
4977
+ },
4978
+ {
4979
+ "epoch": 0.827178729689808,
4980
+ "grad_norm": 1.6899801995875487,
4981
+ "learning_rate": 6.801451229217938e-05,
4982
+ "loss": 0.2507,
4983
+ "num_input_tokens_seen": 29079576,
4984
+ "step": 2800
4985
+ },
4986
+ {
4987
+ "epoch": 0.827178729689808,
4988
+ "eval_loss": 0.32233569025993347,
4989
+ "eval_runtime": 19.2788,
4990
+ "eval_samples_per_second": 3.112,
4991
+ "eval_steps_per_second": 0.778,
4992
+ "num_input_tokens_seen": 29079576,
4993
+ "step": 2800
4994
  }
4995
  ],
4996
  "logging_steps": 5,
4997
  "max_steps": 6770,
4998
+ "num_input_tokens_seen": 29079576,
4999
  "num_train_epochs": 2,
5000
  "save_steps": 50,
5001
  "stateful_callbacks": {
 
5010
  "attributes": {}
5011
  }
5012
  },
5013
+ "total_flos": 1918391455318016.0,
5014
  "train_batch_size": 1,
5015
  "trial_name": null,
5016
  "trial_params": null