ben81828 commited on
Commit
04b9588
·
verified ·
1 Parent(s): f46f432

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddfe67a44369497ba941790aaedb13a9e576b7a570ced2e459401c6bd22bed7b
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e31f39627ba52b48359d084626bc6f99614365bb9537249c642ef0ff5fda15
3
  size 29034840
last-checkpoint/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92a53c404163e85ff983692ad570f400e43c2847cdd0d069c483a732b36be8d4
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c39bc4e3912b020a01c4ad7ed4f21389e5bd74f34aabdc6a1535d7762106341
3
  size 43429616
last-checkpoint/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a03925af6826bf6c00bffcc1584a6359475255347f1f068ef743f7e12c6c9f
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aee732318fb84718d95612348f082dbf37a877f3046b0a67d9dfa1863e6c1ed6
3
  size 43429616
last-checkpoint/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2afe739241a4b9f8fe68c79e7e0a64eb9d2b03897a49eafff1469f818907a43a
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12795b9b3aadf57e342191c9336b3e3f7a73f7a535037e521e2730ea93794f3b
3
  size 43429616
last-checkpoint/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d104230696d55717acd3c2b8a4d72163b534dabb303f6e09895a2f79f4e79a0e
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a9d632fcd9f887ae5bfc2bda7691d7e9e567c4b151964d4183789e16348bda
3
  size 43429616
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step50
 
1
+ global_step100
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbe0d720c4c75a6a04213fa3b64bacbe794718a53e2b56ebb67a1a795014dfad
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308f94f9a5c24e1bad5c393d56ae7af7782600f4e791d9c6ac35b22fff2105b6
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72452d3138d0ca2ff89429e3294a834ae7a68e8596fc757735ca56ae52509d57
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b056f3c23cb32dc77a2ec9e7651e0b64e4440e21f0fdf969b86bfc56a1cbdf06
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f36e306fb8ebcf53a167bfd6c9af74db410a269ada1e619e3e816f5269543b9d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f8a05714bc528f4885a2816181652f2303b3e8150f89b56aaee6bec56aa520
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb47ce0c6f815a6f8302b0e3819b4c2315ca71dae3138d97fdceb765cdd0a039
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f755bd3c330281961e5c03af9d10ce8c1e1678619d384f6f1fd5fd7dce2ff50
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cca48e2d3844a29a683fd85ffce1b801b7cef50edd6cbd3fd12220cfd7592690
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68f3697ebde6b4b610ed5967d76917d2bfd154b81c4fd80c169590521cb7d60
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.889969527721405,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale4/lora/sft/checkpoint-50",
4
- "epoch": 0.022172949002217297,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -96,11 +96,100 @@
96
  "eval_steps_per_second": 0.314,
97
  "num_input_tokens_seen": 521992,
98
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "logging_steps": 5,
102
  "max_steps": 1200,
103
- "num_input_tokens_seen": 521992,
104
  "num_train_epochs": 1,
105
  "save_steps": 50,
106
  "stateful_callbacks": {
@@ -115,7 +204,7 @@
115
  "attributes": {}
116
  }
117
  },
118
- "total_flos": 34435736010752.0,
119
  "train_batch_size": 1,
120
  "trial_name": null,
121
  "trial_params": null
 
1
  {
2
  "best_metric": 0.889969527721405,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale4/lora/sft/checkpoint-50",
4
+ "epoch": 0.04434589800443459,
5
  "eval_steps": 50,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
96
  "eval_steps_per_second": 0.314,
97
  "num_input_tokens_seen": 521992,
98
  "step": 50
99
+ },
100
+ {
101
+ "epoch": 0.024390243902439025,
102
+ "grad_norm": 0.9805700614384589,
103
+ "learning_rate": 9.166666666666667e-05,
104
+ "loss": 0.9117,
105
+ "num_input_tokens_seen": 574288,
106
+ "step": 55
107
+ },
108
+ {
109
+ "epoch": 0.026607538802660754,
110
+ "grad_norm": 0.9233792774186961,
111
+ "learning_rate": 0.0001,
112
+ "loss": 0.8536,
113
+ "num_input_tokens_seen": 626960,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.028824833702882482,
118
+ "grad_norm": 0.6897618492525014,
119
+ "learning_rate": 9.999525361252996e-05,
120
+ "loss": 0.8525,
121
+ "num_input_tokens_seen": 678248,
122
+ "step": 65
123
+ },
124
+ {
125
+ "epoch": 0.031042128603104215,
126
+ "grad_norm": 0.4751822186720059,
127
+ "learning_rate": 9.998101535124758e-05,
128
+ "loss": 0.8411,
129
+ "num_input_tokens_seen": 730376,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.03325942350332594,
134
+ "grad_norm": 0.8184941608670437,
135
+ "learning_rate": 9.995728791936504e-05,
136
+ "loss": 0.8105,
137
+ "num_input_tokens_seen": 781648,
138
+ "step": 75
139
+ },
140
+ {
141
+ "epoch": 0.03547671840354767,
142
+ "grad_norm": 0.930967493911239,
143
+ "learning_rate": 9.992407582166581e-05,
144
+ "loss": 0.8383,
145
+ "num_input_tokens_seen": 833096,
146
+ "step": 80
147
+ },
148
+ {
149
+ "epoch": 0.037694013303769404,
150
+ "grad_norm": 0.9048736490092079,
151
+ "learning_rate": 9.988138536364922e-05,
152
+ "loss": 0.8133,
153
+ "num_input_tokens_seen": 885648,
154
+ "step": 85
155
+ },
156
+ {
157
+ "epoch": 0.03991130820399113,
158
+ "grad_norm": 1.4017690802266505,
159
+ "learning_rate": 9.98292246503335e-05,
160
+ "loss": 0.8217,
161
+ "num_input_tokens_seen": 938208,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 0.04212860310421286,
166
+ "grad_norm": 1.2230184844510747,
167
+ "learning_rate": 9.976760358471686e-05,
168
+ "loss": 0.7601,
169
+ "num_input_tokens_seen": 989992,
170
+ "step": 95
171
+ },
172
+ {
173
+ "epoch": 0.04434589800443459,
174
+ "grad_norm": 1.108668630058659,
175
+ "learning_rate": 9.969653386589748e-05,
176
+ "loss": 0.7938,
177
+ "num_input_tokens_seen": 1042120,
178
+ "step": 100
179
+ },
180
+ {
181
+ "epoch": 0.04434589800443459,
182
+ "eval_loss": 0.8915936946868896,
183
+ "eval_runtime": 19.4403,
184
+ "eval_samples_per_second": 3.086,
185
+ "eval_steps_per_second": 0.772,
186
+ "num_input_tokens_seen": 1042120,
187
+ "step": 100
188
  }
189
  ],
190
  "logging_steps": 5,
191
  "max_steps": 1200,
192
+ "num_input_tokens_seen": 1042120,
193
  "num_train_epochs": 1,
194
  "save_steps": 50,
195
  "stateful_callbacks": {
 
204
  "attributes": {}
205
  }
206
  },
207
+ "total_flos": 68727405543424.0,
208
  "train_batch_size": 1,
209
  "trial_name": null,
210
  "trial_params": null