ben81828 commited on
Commit
2d6bed0
·
verified ·
1 Parent(s): 0c4bade

Training in progress, step 1300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49efabf09713516088de3465883ff1d8943c4d5b7de6e932ffa50531df3a26bb
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b44254d9a1623586ea1f5a75bfc6d736cfd5e37677b54824320d6e82460a2035
3
  size 29034840
last-checkpoint/global_step1300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9c1a42e79721b37ac6048cc8aca57576dc233426bad9e66fd95aa202906a2fc
3
+ size 43429616
last-checkpoint/global_step1300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5700ea0d55977b1676ff4fa5914079f8829077024f21b263faa2c7809bab5667
3
+ size 43429616
last-checkpoint/global_step1300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525b229e937c504e8385a2311ce4e52be4f35717e844a6752cbf30818159facd
3
+ size 43429616
last-checkpoint/global_step1300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:380d00ce28f33e1589c1df8f6f7f7ff5276b46ffb9dc71ba85553eb6f4f98bc1
3
+ size 43429616
last-checkpoint/global_step1300/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e38c4177c1b3c208cadab4e11227a8fda48618f1e7f34460d4a982c52358ed8b
3
+ size 637299
last-checkpoint/global_step1300/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b28c5fffe2b2275164aba9685d890dd7267851d7800fec9fb46b7ed118a21acf
3
+ size 637171
last-checkpoint/global_step1300/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68484b426a7d568a8d268de575beee45dfd1d392aeea48ac22ac8249d9460646
3
+ size 637171
last-checkpoint/global_step1300/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfb359ed776cd63c27b035d670e3798d4487e045fd2576d5beff80cf431925f
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1250
 
1
+ global_step1300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15bbef9a4c878a05b4f189e9f77701a153dc9faf093499714094cb36ac0ca030
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c364a094b8b4b8d6b015687012206e88b2233dd7d6a4f6f395d7aef77752ea67
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a7122760cfda3f3e13eac4f7e56d09b0fd3beac4bd49b2f0e58da9519469ff9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd2e297a13ac051fc5f3cce9c34767e51a5cb4574835aa8bd1309d8cdc48053
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71edf044d4ea96e62ffecab515d42b58ae56cd64abd0092a897468e0e0d7c10f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028047dd1753d92e11bf971ee14a5c981a9a3ea6631f228e38475027eb5ae430
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef5ca30351deca9d83675a183b2186ef346093f56e0d5a6dacdf62ef921f12c2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000eb4cb4096582f15856f380d5f2a9a00eaecdbb95f2289a7a81a0a624fdf72
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88c4e20232245763c3fa9c7256563263254aa7f4fcf1acb3e46226c0466eab5e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:522af9421667c7c03e6690f39891bfba59de53aa879f1f8a4c0690d7908aa17c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2966395914554596,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-1200",
4
- "epoch": 0.36927621861152143,
5
  "eval_steps": 50,
6
- "global_step": 1250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2232,11 +2232,100 @@
2232
  "eval_steps_per_second": 0.792,
2233
  "num_input_tokens_seen": 12972696,
2234
  "step": 1250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2235
  }
2236
  ],
2237
  "logging_steps": 5,
2238
  "max_steps": 6770,
2239
- "num_input_tokens_seen": 12972696,
2240
  "num_train_epochs": 2,
2241
  "save_steps": 50,
2242
  "stateful_callbacks": {
@@ -2251,7 +2340,7 @@
2251
  "attributes": {}
2252
  }
2253
  },
2254
- "total_flos": 855825716609024.0,
2255
  "train_batch_size": 1,
2256
  "trial_name": null,
2257
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2966395914554596,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-1200",
4
+ "epoch": 0.38404726735598227,
5
  "eval_steps": 50,
6
+ "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2232
  "eval_steps_per_second": 0.792,
2233
  "num_input_tokens_seen": 12972696,
2234
  "step": 1250
2235
+ },
2236
+ {
2237
+ "epoch": 0.3707533234859675,
2238
+ "grad_norm": 2.1850732370217045,
2239
+ "learning_rate": 9.507716917831099e-05,
2240
+ "loss": 0.3242,
2241
+ "num_input_tokens_seen": 13025280,
2242
+ "step": 1255
2243
+ },
2244
+ {
2245
+ "epoch": 0.3722304283604136,
2246
+ "grad_norm": 7.470465465497159,
2247
+ "learning_rate": 9.50241918292927e-05,
2248
+ "loss": 0.4083,
2249
+ "num_input_tokens_seen": 13075992,
2250
+ "step": 1260
2251
+ },
2252
+ {
2253
+ "epoch": 0.37370753323485967,
2254
+ "grad_norm": 10.134768151698713,
2255
+ "learning_rate": 9.49709458663975e-05,
2256
+ "loss": 0.4043,
2257
+ "num_input_tokens_seen": 13128592,
2258
+ "step": 1265
2259
+ },
2260
+ {
2261
+ "epoch": 0.37518463810930575,
2262
+ "grad_norm": 7.635543650225297,
2263
+ "learning_rate": 9.491743160729026e-05,
2264
+ "loss": 0.3481,
2265
+ "num_input_tokens_seen": 13181824,
2266
+ "step": 1270
2267
+ },
2268
+ {
2269
+ "epoch": 0.3766617429837518,
2270
+ "grad_norm": 17.089924601510244,
2271
+ "learning_rate": 9.486364937123651e-05,
2272
+ "loss": 0.4121,
2273
+ "num_input_tokens_seen": 13233624,
2274
+ "step": 1275
2275
+ },
2276
+ {
2277
+ "epoch": 0.37813884785819796,
2278
+ "grad_norm": 8.52905916993994,
2279
+ "learning_rate": 9.480959947910055e-05,
2280
+ "loss": 0.487,
2281
+ "num_input_tokens_seen": 13285808,
2282
+ "step": 1280
2283
+ },
2284
+ {
2285
+ "epoch": 0.37961595273264404,
2286
+ "grad_norm": 11.841989523288227,
2287
+ "learning_rate": 9.47552822533435e-05,
2288
+ "loss": 0.3798,
2289
+ "num_input_tokens_seen": 13337864,
2290
+ "step": 1285
2291
+ },
2292
+ {
2293
+ "epoch": 0.3810930576070901,
2294
+ "grad_norm": 2.1853711175575734,
2295
+ "learning_rate": 9.470069801802135e-05,
2296
+ "loss": 0.348,
2297
+ "num_input_tokens_seen": 13390544,
2298
+ "step": 1290
2299
+ },
2300
+ {
2301
+ "epoch": 0.3825701624815362,
2302
+ "grad_norm": 2.9516647949035826,
2303
+ "learning_rate": 9.464584709878313e-05,
2304
+ "loss": 0.41,
2305
+ "num_input_tokens_seen": 13441664,
2306
+ "step": 1295
2307
+ },
2308
+ {
2309
+ "epoch": 0.38404726735598227,
2310
+ "grad_norm": 3.7764410954952514,
2311
+ "learning_rate": 9.459072982286886e-05,
2312
+ "loss": 0.3594,
2313
+ "num_input_tokens_seen": 13493264,
2314
+ "step": 1300
2315
+ },
2316
+ {
2317
+ "epoch": 0.38404726735598227,
2318
+ "eval_loss": 0.4715976417064667,
2319
+ "eval_runtime": 19.0919,
2320
+ "eval_samples_per_second": 3.143,
2321
+ "eval_steps_per_second": 0.786,
2322
+ "num_input_tokens_seen": 13493264,
2323
+ "step": 1300
2324
  }
2325
  ],
2326
  "logging_steps": 5,
2327
  "max_steps": 6770,
2328
+ "num_input_tokens_seen": 13493264,
2329
  "num_train_epochs": 2,
2330
  "save_steps": 50,
2331
  "stateful_callbacks": {
 
2340
  "attributes": {}
2341
  }
2342
  },
2343
+ "total_flos": 890156538658816.0,
2344
  "train_batch_size": 1,
2345
  "trial_name": null,
2346
  "trial_params": null