ben81828 commited on
Commit
b794285
·
verified ·
1 Parent(s): d725f0d

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d8105e08b44734413954313ee3f59582f2323ad431f91a8d42d31e834f41c4c
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1404e60fabb5103dbe434a7e9d7d0543eaa50c6cb487e2d805559319b22760ea
3
  size 18516456
last-checkpoint/global_step2849/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0320fc7e79afa52871fad11b4951ff226723ea958a8c75baa66260e01590d00
3
+ size 27700976
last-checkpoint/global_step2849/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493d98c40644d4613b7287b8a8e89c29c6bcafd90c3423ade06378a38611b230
3
+ size 27700976
last-checkpoint/global_step2849/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da8ec6e6b06c57bc2017e5fd74916f5c7af2806159816995dbd17d2cba41093
3
+ size 27700976
last-checkpoint/global_step2849/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c5c17e53354433c9101c2204b9529ff9ca60c9fa6419f900cd02cf5bcf7e14
3
+ size 27700976
last-checkpoint/global_step2849/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c59124c63278e0ddcdd00d2485f23837eb1e4a24778f31cd0a1f29b7f5ebaf2
3
+ size 411571
last-checkpoint/global_step2849/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fcc75c180d57d0d5cf6ee78c7397542385f0b34c59f3995878fbc1639b1c23
3
+ size 411507
last-checkpoint/global_step2849/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aee38319c36ef0c2fddef1bce787e579f51b9dddc8aade89ea73123165b1ea4
3
+ size 411507
last-checkpoint/global_step2849/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a435cae583755a87893c919608846d626e54fee7180a95a616df5408f65cbf7d
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2799
 
1
+ global_step2849
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff8dba2341c0517760edfde50521977f02a5bd982ffd3bc03de6109439c4f478
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce92cea831a04716b4b472f1dad1cc986b2021dee9aac057217f5d455b27ec42
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2bf831df9fbade9ac2a8db79798bc2a7b1afb85a78a6e463ec7a7db4acc0f8e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cddb73bbdf0f6f6a2c3182d70f7ad5d587353b164c08dd4f383b940d6b61e4e
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8602ff0a0fa366d46b61c0ef2b23ce468387898cf2bc1027e5450de73ddf647f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b24b508e466beb446d37377d2a04757d3bc2b4230de3ac56b25a65d7753a74c1
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bb51d675cf23603b1b765cd645f53d6b66ddb104d56d48674e9c798e086f696
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c6a18a7de8b25b21673ba2ff7efbaaae00ec8c453c7975b467c1df87b87022
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61a7c605cf80a46d2e0c661d5469c16671b681f268e3ecd5d1d64188653910db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cb8f1d536b1fc196e353ec37fd07d574fab0a464ddd8b31c73c59dcab3c03b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 1.4419263456090652,
5
  "eval_steps": 50,
6
- "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4991,11 +4991,100 @@
4991
  "eval_steps_per_second": 0.931,
4992
  "num_input_tokens_seen": 32743032,
4993
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4994
  }
4995
  ],
4996
  "logging_steps": 5,
4997
  "max_steps": 3400,
4998
- "num_input_tokens_seen": 32743032,
4999
  "num_train_epochs": 2,
5000
  "save_steps": 50,
5001
  "stateful_callbacks": {
@@ -5010,7 +5099,7 @@
5010
  "attributes": {}
5011
  }
5012
  },
5013
- "total_flos": 1838675721715712.0,
5014
  "train_batch_size": 1,
5015
  "trial_name": null,
5016
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.467679629152717,
5
  "eval_steps": 50,
6
+ "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4991
  "eval_steps_per_second": 0.931,
4992
  "num_input_tokens_seen": 32743032,
4993
  "step": 2800
4994
+ },
4995
+ {
4996
+ "epoch": 1.4445016739634302,
4997
+ "grad_norm": 4.932628514942533,
4998
+ "learning_rate": 8.141676086873572e-06,
4999
+ "loss": 0.2974,
5000
+ "num_input_tokens_seen": 32801504,
5001
+ "step": 2805
5002
+ },
5003
+ {
5004
+ "epoch": 1.4470770023177955,
5005
+ "grad_norm": 8.764444587690557,
5006
+ "learning_rate": 8.009177137203794e-06,
5007
+ "loss": 0.2849,
5008
+ "num_input_tokens_seen": 32860032,
5009
+ "step": 2810
5010
+ },
5011
+ {
5012
+ "epoch": 1.4496523306721607,
5013
+ "grad_norm": 5.502098759051231,
5014
+ "learning_rate": 7.877671276687898e-06,
5015
+ "loss": 0.3024,
5016
+ "num_input_tokens_seen": 32918472,
5017
+ "step": 2815
5018
+ },
5019
+ {
5020
+ "epoch": 1.452227659026526,
5021
+ "grad_norm": 3.2634043608450183,
5022
+ "learning_rate": 7.747161615458902e-06,
5023
+ "loss": 0.2565,
5024
+ "num_input_tokens_seen": 32976944,
5025
+ "step": 2820
5026
+ },
5027
+ {
5028
+ "epoch": 1.4548029873808912,
5029
+ "grad_norm": 4.852977750360098,
5030
+ "learning_rate": 7.617651240089546e-06,
5031
+ "loss": 0.2473,
5032
+ "num_input_tokens_seen": 33035424,
5033
+ "step": 2825
5034
+ },
5035
+ {
5036
+ "epoch": 1.4573783157352562,
5037
+ "grad_norm": 8.667293936674204,
5038
+ "learning_rate": 7.489143213519301e-06,
5039
+ "loss": 0.3118,
5040
+ "num_input_tokens_seen": 33093880,
5041
+ "step": 2830
5042
+ },
5043
+ {
5044
+ "epoch": 1.4599536440896215,
5045
+ "grad_norm": 9.253351843058615,
5046
+ "learning_rate": 7.361640574981937e-06,
5047
+ "loss": 0.2593,
5048
+ "num_input_tokens_seen": 33152328,
5049
+ "step": 2835
5050
+ },
5051
+ {
5052
+ "epoch": 1.4625289724439865,
5053
+ "grad_norm": 6.811131820051524,
5054
+ "learning_rate": 7.2351463399336735e-06,
5055
+ "loss": 0.284,
5056
+ "num_input_tokens_seen": 33210816,
5057
+ "step": 2840
5058
+ },
5059
+ {
5060
+ "epoch": 1.4651043007983517,
5061
+ "grad_norm": 4.086720732934785,
5062
+ "learning_rate": 7.109663499981834e-06,
5063
+ "loss": 0.2671,
5064
+ "num_input_tokens_seen": 33269320,
5065
+ "step": 2845
5066
+ },
5067
+ {
5068
+ "epoch": 1.467679629152717,
5069
+ "grad_norm": 9.463519299706055,
5070
+ "learning_rate": 6.985195022814067e-06,
5071
+ "loss": 0.2848,
5072
+ "num_input_tokens_seen": 33327720,
5073
+ "step": 2850
5074
+ },
5075
+ {
5076
+ "epoch": 1.467679629152717,
5077
+ "eval_loss": 0.8045337796211243,
5078
+ "eval_runtime": 15.9996,
5079
+ "eval_samples_per_second": 3.75,
5080
+ "eval_steps_per_second": 0.938,
5081
+ "num_input_tokens_seen": 33327720,
5082
+ "step": 2850
5083
  }
5084
  ],
5085
  "logging_steps": 5,
5086
  "max_steps": 3400,
5087
+ "num_input_tokens_seen": 33327720,
5088
  "num_train_epochs": 2,
5089
  "save_steps": 50,
5090
  "stateful_callbacks": {
 
5099
  "attributes": {}
5100
  }
5101
  },
5102
+ "total_flos": 1871508641415168.0,
5103
  "train_batch_size": 1,
5104
  "trial_name": null,
5105
  "trial_params": null