ben81828 commited on
Commit
5a96221
·
verified ·
1 Parent(s): 9b25218

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0fb13d10a5153c42be30667ec413318749436b697a6a62d3dd8e7e8cd181234
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54617ace9be9da2e4e57ae55a0f5329011112b5c8868c9fffa532ea69e4c2d98
3
  size 29034840
last-checkpoint/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ddc2817afdc9225f515b78a9eb8a53a0f3315e3cdd9dea905c90813f2839bdd
3
+ size 43429616
last-checkpoint/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1f3250f9636418835e6fb0cb6591fe5d43889ab3ec1d5034336bd3337377ae7
3
+ size 43429616
last-checkpoint/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cce653c94d71ed469a09c7d917fa914f90a11a71b34f8eeffd05e35fc4c4515
3
+ size 43429616
last-checkpoint/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4098ce82fff11005a17eecd3482bcc1b0d125c2c4454f4f2c5da52a26a9e46e4
3
+ size 43429616
last-checkpoint/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d10e19daf23d9a5519105cd8f02b133fd832afdbceaae32bd68ecfb18273f2
3
+ size 637299
last-checkpoint/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e70d7ddedc58dfdc0ec49a115d1b1e0af3fb290d2785ce1448b9276a2c88d36
3
+ size 637171
last-checkpoint/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea9021c0781ce4f2bf1279fa5cd2b7bb74cadac2cd292ff41e5bda0c40f8f90
3
+ size 637171
last-checkpoint/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db256fcff0519e97f39b24fc7b31436bff588b11e7bcc1b1e3b7bba4345a7d67
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step50
 
1
+ global_step100
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbe0d720c4c75a6a04213fa3b64bacbe794718a53e2b56ebb67a1a795014dfad
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308f94f9a5c24e1bad5c393d56ae7af7782600f4e791d9c6ac35b22fff2105b6
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72452d3138d0ca2ff89429e3294a834ae7a68e8596fc757735ca56ae52509d57
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b056f3c23cb32dc77a2ec9e7651e0b64e4440e21f0fdf969b86bfc56a1cbdf06
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f36e306fb8ebcf53a167bfd6c9af74db410a269ada1e619e3e816f5269543b9d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f8a05714bc528f4885a2816181652f2303b3e8150f89b56aaee6bec56aa520
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb47ce0c6f815a6f8302b0e3819b4c2315ca71dae3138d97fdceb765cdd0a039
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f755bd3c330281961e5c03af9d10ce8c1e1678619d384f6f1fd5fd7dce2ff50
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ab82860e8f4753962498aa91f545ddf5d94711cc852b62536116736b2908eac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e451a9e086b06d7c667be8442b2115f5c088953bade0b625e61f2ce5c7fd404
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.33834776282310486,
3
- "best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-50",
4
- "epoch": 0.025753283543651816,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -96,11 +96,100 @@
96
  "eval_steps_per_second": 0.316,
97
  "num_input_tokens_seen": 499200,
98
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "logging_steps": 5,
102
  "max_steps": 3400,
103
- "num_input_tokens_seen": 499200,
104
  "num_train_epochs": 2,
105
  "save_steps": 50,
106
  "stateful_callbacks": {
@@ -115,7 +204,7 @@
115
  "attributes": {}
116
  }
117
  },
118
- "total_flos": 32884807958528.0,
119
  "train_batch_size": 1,
120
  "trial_name": null,
121
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.18663176894187927,
3
+ "best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-100",
4
+ "epoch": 0.05150656708730363,
5
  "eval_steps": 50,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
96
  "eval_steps_per_second": 0.316,
97
  "num_input_tokens_seen": 499200,
98
  "step": 50
99
+ },
100
+ {
101
+ "epoch": 0.028328611898016998,
102
+ "grad_norm": 1.4016556961184263,
103
+ "learning_rate": 3.235294117647059e-05,
104
+ "loss": 0.3182,
105
+ "num_input_tokens_seen": 549120,
106
+ "step": 55
107
+ },
108
+ {
109
+ "epoch": 0.03090394025238218,
110
+ "grad_norm": 0.6437613769459606,
111
+ "learning_rate": 3.529411764705883e-05,
112
+ "loss": 0.3294,
113
+ "num_input_tokens_seen": 599040,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.03347926860674736,
118
+ "grad_norm": 0.7389008951321312,
119
+ "learning_rate": 3.8235294117647055e-05,
120
+ "loss": 0.3097,
121
+ "num_input_tokens_seen": 648960,
122
+ "step": 65
123
+ },
124
+ {
125
+ "epoch": 0.036054596961112545,
126
+ "grad_norm": 0.771553860801019,
127
+ "learning_rate": 4.11764705882353e-05,
128
+ "loss": 0.3008,
129
+ "num_input_tokens_seen": 698880,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.03862992531547772,
134
+ "grad_norm": 0.6965369148334918,
135
+ "learning_rate": 4.411764705882353e-05,
136
+ "loss": 0.3278,
137
+ "num_input_tokens_seen": 748800,
138
+ "step": 75
139
+ },
140
+ {
141
+ "epoch": 0.04120525366984291,
142
+ "grad_norm": 0.912943461315541,
143
+ "learning_rate": 4.705882352941177e-05,
144
+ "loss": 0.3074,
145
+ "num_input_tokens_seen": 798720,
146
+ "step": 80
147
+ },
148
+ {
149
+ "epoch": 0.043780582024208085,
150
+ "grad_norm": 0.8407481737577445,
151
+ "learning_rate": 5e-05,
152
+ "loss": 0.3423,
153
+ "num_input_tokens_seen": 848640,
154
+ "step": 85
155
+ },
156
+ {
157
+ "epoch": 0.04635591037857327,
158
+ "grad_norm": 0.9112879058417015,
159
+ "learning_rate": 5.294117647058824e-05,
160
+ "loss": 0.3008,
161
+ "num_input_tokens_seen": 898560,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 0.04893123873293845,
166
+ "grad_norm": 2.391489040464162,
167
+ "learning_rate": 5.588235294117647e-05,
168
+ "loss": 0.2815,
169
+ "num_input_tokens_seen": 948480,
170
+ "step": 95
171
+ },
172
+ {
173
+ "epoch": 0.05150656708730363,
174
+ "grad_norm": 2.155211791607199,
175
+ "learning_rate": 5.882352941176471e-05,
176
+ "loss": 0.2274,
177
+ "num_input_tokens_seen": 998400,
178
+ "step": 100
179
+ },
180
+ {
181
+ "epoch": 0.05150656708730363,
182
+ "eval_loss": 0.18663176894187927,
183
+ "eval_runtime": 18.9199,
184
+ "eval_samples_per_second": 3.171,
185
+ "eval_steps_per_second": 0.793,
186
+ "num_input_tokens_seen": 998400,
187
+ "step": 100
188
  }
189
  ],
190
  "logging_steps": 5,
191
  "max_steps": 3400,
192
+ "num_input_tokens_seen": 998400,
193
  "num_train_epochs": 2,
194
  "save_steps": 50,
195
  "stateful_callbacks": {
 
204
  "attributes": {}
205
  }
206
  },
207
+ "total_flos": 65824787529728.0,
208
  "train_batch_size": 1,
209
  "trial_name": null,
210
  "trial_params": null