ben81828 commited on
Commit
f21927a
·
verified ·
1 Parent(s): 02bbacd

Training in progress, step 3050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75557f39c0933cfb737e5fea18b13474d2e22f2d9438f06b8e2ce77a22c9b865
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5be8bc8351dafe40df5f56b88df2329a278b65ac350dc2b871125aa7c127df5
3
  size 29034840
last-checkpoint/global_step3050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408195cc2a75b9d9dec8c11f5a11495193a0ffc68118c11703359aa86321b503
3
+ size 43429616
last-checkpoint/global_step3050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a510fe5d1eabc5456190aeba27f6e572117e85b7af25c7cc138cc2192fb3ffe
3
+ size 43429616
last-checkpoint/global_step3050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be5c496609c0feb795d46f180675cc4ee4fbe1d9b8c53c57c1a515f05995e759
3
+ size 43429616
last-checkpoint/global_step3050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1018e102910e7c1651e3be7364ad138b52b3bd68573277eec5d851de006ace2
3
+ size 43429616
last-checkpoint/global_step3050/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa48e566d49855033cca2fb11b808d251dc1cef85c4e14422e4b7323b7c5cedd
3
+ size 637299
last-checkpoint/global_step3050/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0b4aeba5ad023bbcf369d3255e9a14871d54ebfb11daaf1310b94fa4ee0c67
3
+ size 637171
last-checkpoint/global_step3050/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bde9b336c46cc89d1b138ba52bd463c46440720d03ce038f35705b1d36df8f3
3
+ size 637171
last-checkpoint/global_step3050/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a81bec2d03d08f9cffb54b4dccc79722ba6644873d6d5b4d8c4bb8b402255b
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3000
 
1
+ global_step3050
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8ac6af6ac2b73603409d1721537224310f2ce061bc8c1c1c6f959231ed2e31e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f30f7f3c3620ccd30020e1ea4b81d1a56ee511b742c91370577d55399c14412
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:693cfdc542e1c8c319f7052d02602310660cb04e6571aa78525e03834c8b9930
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440cc1f8e138e1a90606722bd350460b4460991a2f3671f46f880f5743522dca
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ca85a038ef549f3dd6ca18c0dadc240e57927cad9adbcbd9ff224f3da3ae003
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d78d5df466f5b34e85649b90825d4b168464f6d49c668313415473184409b799
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c2a98b011c36f42f0d59506d21a897e487e2865babe045af4934af8b2748950
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8be6df9e66c8bba407df289d2aa6b4422668727a1e2419037de032213393a4
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4494fc1ba6eef0a9379f34c371e4e34294ca2139350203f5fe5a9d90eff98549
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3da17ebe605411b58ea2b644f619dba9557cc5aeaf2c6d740ea8353d5ec2fab3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.1869634985923767,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-2350",
4
- "epoch": 0.8862629246676514,
5
  "eval_steps": 50,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5347,11 +5347,100 @@
5347
  "eval_steps_per_second": 0.774,
5348
  "num_input_tokens_seen": 31159736,
5349
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5350
  }
5351
  ],
5352
  "logging_steps": 5,
5353
  "max_steps": 6770,
5354
- "num_input_tokens_seen": 31159736,
5355
  "num_train_epochs": 2,
5356
  "save_steps": 50,
5357
  "stateful_callbacks": {
@@ -5366,7 +5455,7 @@
5366
  "attributes": {}
5367
  }
5368
  },
5369
- "total_flos": 2055677932208128.0,
5370
  "train_batch_size": 1,
5371
  "trial_name": null,
5372
  "trial_params": null
 
1
  {
2
  "best_metric": 0.1869634985923767,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-2350",
4
+ "epoch": 0.9010339734121122,
5
  "eval_steps": 50,
6
+ "global_step": 3050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5347
  "eval_steps_per_second": 0.774,
5348
  "num_input_tokens_seen": 31159736,
5349
  "step": 3000
5350
+ },
5351
+ {
5352
+ "epoch": 0.8877400295420975,
5353
+ "grad_norm": 1.2324370262682327,
5354
+ "learning_rate": 6.326113804772157e-05,
5355
+ "loss": 0.2011,
5356
+ "num_input_tokens_seen": 31211632,
5357
+ "step": 3005
5358
+ },
5359
+ {
5360
+ "epoch": 0.8892171344165436,
5361
+ "grad_norm": 1.7142403977850047,
5362
+ "learning_rate": 6.314334540712983e-05,
5363
+ "loss": 0.2197,
5364
+ "num_input_tokens_seen": 31264376,
5365
+ "step": 3010
5366
+ },
5367
+ {
5368
+ "epoch": 0.8906942392909897,
5369
+ "grad_norm": 1.50601762442169,
5370
+ "learning_rate": 6.302547435347122e-05,
5371
+ "loss": 0.1853,
5372
+ "num_input_tokens_seen": 31316584,
5373
+ "step": 3015
5374
+ },
5375
+ {
5376
+ "epoch": 0.8921713441654358,
5377
+ "grad_norm": 1.2451326697290692,
5378
+ "learning_rate": 6.290752558996325e-05,
5379
+ "loss": 0.2312,
5380
+ "num_input_tokens_seen": 31367768,
5381
+ "step": 3020
5382
+ },
5383
+ {
5384
+ "epoch": 0.8936484490398818,
5385
+ "grad_norm": 1.3816673419860452,
5386
+ "learning_rate": 6.278949982028704e-05,
5387
+ "loss": 0.2608,
5388
+ "num_input_tokens_seen": 31419664,
5389
+ "step": 3025
5390
+ },
5391
+ {
5392
+ "epoch": 0.8951255539143279,
5393
+ "grad_norm": 1.430852186731335,
5394
+ "learning_rate": 6.267139774858318e-05,
5395
+ "loss": 0.225,
5396
+ "num_input_tokens_seen": 31471672,
5397
+ "step": 3030
5398
+ },
5399
+ {
5400
+ "epoch": 0.896602658788774,
5401
+ "grad_norm": 22.943001206859204,
5402
+ "learning_rate": 6.255322007944743e-05,
5403
+ "loss": 0.225,
5404
+ "num_input_tokens_seen": 31523888,
5405
+ "step": 3035
5406
+ },
5407
+ {
5408
+ "epoch": 0.8980797636632201,
5409
+ "grad_norm": 0.8776969753848171,
5410
+ "learning_rate": 6.243496751792658e-05,
5411
+ "loss": 0.2291,
5412
+ "num_input_tokens_seen": 31574992,
5413
+ "step": 3040
5414
+ },
5415
+ {
5416
+ "epoch": 0.8995568685376661,
5417
+ "grad_norm": 0.8040234911638864,
5418
+ "learning_rate": 6.231664076951421e-05,
5419
+ "loss": 0.1926,
5420
+ "num_input_tokens_seen": 31627608,
5421
+ "step": 3045
5422
+ },
5423
+ {
5424
+ "epoch": 0.9010339734121122,
5425
+ "grad_norm": 1.336878225583922,
5426
+ "learning_rate": 6.219824054014656e-05,
5427
+ "loss": 0.2303,
5428
+ "num_input_tokens_seen": 31679080,
5429
+ "step": 3050
5430
+ },
5431
+ {
5432
+ "epoch": 0.9010339734121122,
5433
+ "eval_loss": 0.22528553009033203,
5434
+ "eval_runtime": 19.2502,
5435
+ "eval_samples_per_second": 3.117,
5436
+ "eval_steps_per_second": 0.779,
5437
+ "num_input_tokens_seen": 31679080,
5438
+ "step": 3050
5439
  }
5440
  ],
5441
  "logging_steps": 5,
5442
  "max_steps": 6770,
5443
+ "num_input_tokens_seen": 31679080,
5444
  "num_train_epochs": 2,
5445
  "save_steps": 50,
5446
  "stateful_callbacks": {
 
5455
  "attributes": {}
5456
  }
5457
  },
5458
+ "total_flos": 2089927830929408.0,
5459
  "train_batch_size": 1,
5460
  "trial_name": null,
5461
  "trial_params": null