ben81828 commited on
Commit
8fbf3ce
·
verified ·
1 Parent(s): c1999fa

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b44254d9a1623586ea1f5a75bfc6d736cfd5e37677b54824320d6e82460a2035
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5539aab9f718348f04ec41613d234e7bd892b381194398a0eadf2adcfde0ddb
3
  size 29034840
last-checkpoint/global_step1350/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e24af6e1a43be5add6cbc89509eb769890f71ee6fa6b54fe0923600620399dbd
3
+ size 43429616
last-checkpoint/global_step1350/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc11fd4b339cc5d601d29b4a63f03eb3b4425c4b7691bcb26a63957620b86b5
3
+ size 43429616
last-checkpoint/global_step1350/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649a3c6c2e41790aba280e8054ec3e4fd58e892c2b27b75d5453836d0575bf49
3
+ size 43429616
last-checkpoint/global_step1350/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7b9e52dd1f46868c6b578707680d4a3b5d2f23924ee30a2c75ef30b65e6c4b
3
+ size 43429616
last-checkpoint/global_step1350/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf6a04ed575d4bbe95b19e28c451beac968995fe7f81b545d3f82185be8aab33
3
+ size 637299
last-checkpoint/global_step1350/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6797229513b11978627019edbf1729dfa52f86c1151aac9777d9693a2be4536b
3
+ size 637171
last-checkpoint/global_step1350/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3dff6f8ada3bacd5905b06ee331028a57bd1b141fb55f651aca5cbe974a3a95
3
+ size 637171
last-checkpoint/global_step1350/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0cfc18b2fc2a54e8761219cee72f1cb08510e09bfc522456e8dbdebcd2dde79
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1300
 
1
+ global_step1350
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c364a094b8b4b8d6b015687012206e88b2233dd7d6a4f6f395d7aef77752ea67
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9747fe881253e52a47314f48068ef9649032bec4cb284b1b4becbb8787f37faa
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bd2e297a13ac051fc5f3cce9c34767e51a5cb4574835aa8bd1309d8cdc48053
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac02a5554a5ef9e3473dcd2926626ae41f4777354859c7d2bf0a0c1188c0583
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:028047dd1753d92e11bf971ee14a5c981a9a3ea6631f228e38475027eb5ae430
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9405d230cc78dac3f3b2ab887674631c15f66fedab0042ab7bc1bd83b8575344
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:000eb4cb4096582f15856f380d5f2a9a00eaecdbb95f2289a7a81a0a624fdf72
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b15237547030ac62d49d70a5465b2e29515e6334f62416eb16c0c6d073f7c6bf
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:522af9421667c7c03e6690f39891bfba59de53aa879f1f8a4c0690d7908aa17c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5473741dbc1be4510ceec76e5e3cd10aedf1c7667da998c551f8978b6c6c33d3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2966395914554596,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-1200",
4
- "epoch": 0.38404726735598227,
5
  "eval_steps": 50,
6
- "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2321,11 +2321,100 @@
2321
  "eval_steps_per_second": 0.786,
2322
  "num_input_tokens_seen": 13493264,
2323
  "step": 1300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2324
  }
2325
  ],
2326
  "logging_steps": 5,
2327
  "max_steps": 6770,
2328
- "num_input_tokens_seen": 13493264,
2329
  "num_train_epochs": 2,
2330
  "save_steps": 50,
2331
  "stateful_callbacks": {
@@ -2340,7 +2429,7 @@
2340
  "attributes": {}
2341
  }
2342
  },
2343
- "total_flos": 890156538658816.0,
2344
  "train_batch_size": 1,
2345
  "trial_name": null,
2346
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2966395914554596,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-1200",
4
+ "epoch": 0.3988183161004431,
5
  "eval_steps": 50,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2321
  "eval_steps_per_second": 0.786,
2322
  "num_input_tokens_seen": 13493264,
2323
  "step": 1300
2324
+ },
2325
+ {
2326
+ "epoch": 0.38552437223042835,
2327
+ "grad_norm": 15.50071615139337,
2328
+ "learning_rate": 9.453534651910765e-05,
2329
+ "loss": 0.402,
2330
+ "num_input_tokens_seen": 13545256,
2331
+ "step": 1305
2332
+ },
2333
+ {
2334
+ "epoch": 0.3870014771048744,
2335
+ "grad_norm": 23.183495844663526,
2336
+ "learning_rate": 9.447969751791577e-05,
2337
+ "loss": 0.3075,
2338
+ "num_input_tokens_seen": 13597792,
2339
+ "step": 1310
2340
+ },
2341
+ {
2342
+ "epoch": 0.38847858197932056,
2343
+ "grad_norm": 9.67544956653079,
2344
+ "learning_rate": 9.442378315129455e-05,
2345
+ "loss": 0.3702,
2346
+ "num_input_tokens_seen": 13649848,
2347
+ "step": 1315
2348
+ },
2349
+ {
2350
+ "epoch": 0.38995568685376664,
2351
+ "grad_norm": 2.9059361985914416,
2352
+ "learning_rate": 9.436760375282859e-05,
2353
+ "loss": 0.3603,
2354
+ "num_input_tokens_seen": 13701592,
2355
+ "step": 1320
2356
+ },
2357
+ {
2358
+ "epoch": 0.3914327917282127,
2359
+ "grad_norm": 10.431238621222658,
2360
+ "learning_rate": 9.431115965768358e-05,
2361
+ "loss": 0.4072,
2362
+ "num_input_tokens_seen": 13753064,
2363
+ "step": 1325
2364
+ },
2365
+ {
2366
+ "epoch": 0.3929098966026588,
2367
+ "grad_norm": 11.216612661805582,
2368
+ "learning_rate": 9.425445120260445e-05,
2369
+ "loss": 0.3279,
2370
+ "num_input_tokens_seen": 13805528,
2371
+ "step": 1330
2372
+ },
2373
+ {
2374
+ "epoch": 0.39438700147710487,
2375
+ "grad_norm": 32.22838128750362,
2376
+ "learning_rate": 9.419747872591325e-05,
2377
+ "loss": 0.3754,
2378
+ "num_input_tokens_seen": 13858192,
2379
+ "step": 1335
2380
+ },
2381
+ {
2382
+ "epoch": 0.39586410635155095,
2383
+ "grad_norm": 1.8703742105152936,
2384
+ "learning_rate": 9.414024256750723e-05,
2385
+ "loss": 0.3754,
2386
+ "num_input_tokens_seen": 13910128,
2387
+ "step": 1340
2388
+ },
2389
+ {
2390
+ "epoch": 0.397341211225997,
2391
+ "grad_norm": 5.011302513950015,
2392
+ "learning_rate": 9.408274306885674e-05,
2393
+ "loss": 0.3235,
2394
+ "num_input_tokens_seen": 13962536,
2395
+ "step": 1345
2396
+ },
2397
+ {
2398
+ "epoch": 0.3988183161004431,
2399
+ "grad_norm": 15.197987760428996,
2400
+ "learning_rate": 9.402498057300317e-05,
2401
+ "loss": 0.3731,
2402
+ "num_input_tokens_seen": 14014736,
2403
+ "step": 1350
2404
+ },
2405
+ {
2406
+ "epoch": 0.3988183161004431,
2407
+ "eval_loss": 0.5565826892852783,
2408
+ "eval_runtime": 19.3029,
2409
+ "eval_samples_per_second": 3.108,
2410
+ "eval_steps_per_second": 0.777,
2411
+ "num_input_tokens_seen": 14014736,
2412
+ "step": 1350
2413
  }
2414
  ],
2415
  "logging_steps": 5,
2416
  "max_steps": 6770,
2417
+ "num_input_tokens_seen": 14014736,
2418
  "num_train_epochs": 2,
2419
  "save_steps": 50,
2420
  "stateful_callbacks": {
 
2429
  "attributes": {}
2430
  }
2431
  },
2432
+ "total_flos": 924522107633664.0,
2433
  "train_batch_size": 1,
2434
  "trial_name": null,
2435
  "trial_params": null