ben81828 commited on
Commit
fabf0a4
·
verified ·
1 Parent(s): aba3339

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d060453114c8a92294690d88c0be250011d18ae38a75a4c22a50983826ea4699
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27b450bf69f26bb0a80206beeb4dd904291117c93705869b5a8ff84aa1e3f84
3
  size 18516456
last-checkpoint/global_step1999/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023f92380397745f8f77ec321aafe2aa87a6575324f089996239b60a3189a75c
3
+ size 27700976
last-checkpoint/global_step1999/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d518f32093035095c01b2c282c562dd35fa1efdb54745f369cbecd811ee1400e
3
+ size 27700976
last-checkpoint/global_step1999/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b99d59e37119ed5eb8569f17f475bf3a623293a6eae349fd2f4effe3f03805e8
3
+ size 27700976
last-checkpoint/global_step1999/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ff1a1bccc2cd493f0260ddb9c05a93dbfd5f46956ea28826fe40360f48b4b2
3
+ size 27700976
last-checkpoint/global_step1999/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54b98763325461b6d9c8e5fe28ccb463f2ecc19bb48a6257a639424b649fb1f
3
+ size 411571
last-checkpoint/global_step1999/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deca8205cfceb02ab7ae7fc764eaabb2ec9f99c411572adb08a6d62a415e2ea0
3
+ size 411507
last-checkpoint/global_step1999/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4edec322c3f2cc486eecfd582f74e2c7ea6f1c964c25e64669da8706cb8bc27
3
+ size 411507
last-checkpoint/global_step1999/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cf51d7631542144114ab6c24b5854b5b2433aca495109faf0b36df3eb80003
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1949
 
1
+ global_step1999
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76c7a1c4ad326ceb79f3afbc6d47975b14a4cb17c9f8fb7483b37b11ee134aac
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a695de1db3382235d3f8ae213672491aa2fdc3ba3be96403a089077ad3c2bf
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:364e129a29ed2420756ce71165221396b3418a310a60e2d96548d62cc7590232
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2025b170fa1d4693537c2d73f89a6495c58940d033678742a74810c0154a6a7
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89d078b36f7a96070fb2b399b9fe9bc0196d5110cb8255158e3354d836845a5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0865c4d7d921b23a22c91c2f3b2c6cca03dae0eb27c43dee575c9602605c94d6
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10672d33daa64ff34468d947c3c30b17fe906ae6c3d9ace60ba1c3e119c8efa4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4ec19d9df4417359523e8cc4d27875614c1021ebcc6391b27632aa7897b7ea
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83645aef3795b979dc9a5c77e06d9eebfefa4998e2a5da58eb5de75d197fb29f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f900ab4b5112c328cc663d738cb777b204b3c8bdcbb2ceb91ed099a09d207ee1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 1.0041205253669843,
5
  "eval_steps": 50,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3478,11 +3478,100 @@
3478
  "eval_steps_per_second": 0.919,
3479
  "num_input_tokens_seen": 22801512,
3480
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3481
  }
3482
  ],
3483
  "logging_steps": 5,
3484
  "max_steps": 3400,
3485
- "num_input_tokens_seen": 22801512,
3486
  "num_train_epochs": 2,
3487
  "save_steps": 50,
3488
  "stateful_callbacks": {
@@ -3497,7 +3586,7 @@
3497
  "attributes": {}
3498
  }
3499
  },
3500
- "total_flos": 1280406731358208.0,
3501
  "train_batch_size": 1,
3502
  "trial_name": null,
3503
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.029873808910636,
5
  "eval_steps": 50,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3478
  "eval_steps_per_second": 0.919,
3479
  "num_input_tokens_seen": 22801512,
3480
  "step": 1950
3481
+ },
3482
+ {
3483
+ "epoch": 1.0066958537213495,
3484
+ "grad_norm": 6.506183969602581,
3485
+ "learning_rate": 4.17702704859633e-05,
3486
+ "loss": 0.3784,
3487
+ "num_input_tokens_seen": 22859952,
3488
+ "step": 1955
3489
+ },
3490
+ {
3491
+ "epoch": 1.0092711820757148,
3492
+ "grad_norm": 7.31299798110374,
3493
+ "learning_rate": 4.153052777136879e-05,
3494
+ "loss": 0.5587,
3495
+ "num_input_tokens_seen": 22918440,
3496
+ "step": 1960
3497
+ },
3498
+ {
3499
+ "epoch": 1.0118465104300798,
3500
+ "grad_norm": 4.338872323547646,
3501
+ "learning_rate": 4.1290985361041614e-05,
3502
+ "loss": 0.3803,
3503
+ "num_input_tokens_seen": 22976944,
3504
+ "step": 1965
3505
+ },
3506
+ {
3507
+ "epoch": 1.014421838784445,
3508
+ "grad_norm": 6.798827966152428,
3509
+ "learning_rate": 4.105164892019514e-05,
3510
+ "loss": 0.4038,
3511
+ "num_input_tokens_seen": 23035408,
3512
+ "step": 1970
3513
+ },
3514
+ {
3515
+ "epoch": 1.0169971671388103,
3516
+ "grad_norm": 5.018683403937771,
3517
+ "learning_rate": 4.0812524109171476e-05,
3518
+ "loss": 0.3226,
3519
+ "num_input_tokens_seen": 23093912,
3520
+ "step": 1975
3521
+ },
3522
+ {
3523
+ "epoch": 1.0195724954931753,
3524
+ "grad_norm": 4.594775856201265,
3525
+ "learning_rate": 4.0573616583307705e-05,
3526
+ "loss": 0.4026,
3527
+ "num_input_tokens_seen": 23152344,
3528
+ "step": 1980
3529
+ },
3530
+ {
3531
+ "epoch": 1.0221478238475405,
3532
+ "grad_norm": 7.5346230342964695,
3533
+ "learning_rate": 4.033493199280202e-05,
3534
+ "loss": 0.4225,
3535
+ "num_input_tokens_seen": 23210800,
3536
+ "step": 1985
3537
+ },
3538
+ {
3539
+ "epoch": 1.0247231522019058,
3540
+ "grad_norm": 8.213657673441388,
3541
+ "learning_rate": 4.009647598258022e-05,
3542
+ "loss": 0.3058,
3543
+ "num_input_tokens_seen": 23269304,
3544
+ "step": 1990
3545
+ },
3546
+ {
3547
+ "epoch": 1.0272984805562708,
3548
+ "grad_norm": 6.881744374075897,
3549
+ "learning_rate": 3.985825419216207e-05,
3550
+ "loss": 0.3821,
3551
+ "num_input_tokens_seen": 23327800,
3552
+ "step": 1995
3553
+ },
3554
+ {
3555
+ "epoch": 1.029873808910636,
3556
+ "grad_norm": 3.916989546123924,
3557
+ "learning_rate": 3.962027225552807e-05,
3558
+ "loss": 0.3328,
3559
+ "num_input_tokens_seen": 23386232,
3560
+ "step": 2000
3561
+ },
3562
+ {
3563
+ "epoch": 1.029873808910636,
3564
+ "eval_loss": 0.7484827041625977,
3565
+ "eval_runtime": 16.091,
3566
+ "eval_samples_per_second": 3.729,
3567
+ "eval_steps_per_second": 0.932,
3568
+ "num_input_tokens_seen": 23386232,
3569
+ "step": 2000
3570
  }
3571
  ],
3572
  "logging_steps": 5,
3573
  "max_steps": 3400,
3574
+ "num_input_tokens_seen": 23386232,
3575
  "num_train_epochs": 2,
3576
  "save_steps": 50,
3577
  "stateful_callbacks": {
 
3586
  "attributes": {}
3587
  }
3588
  },
3589
+ "total_flos": 1313241973129216.0,
3590
  "train_batch_size": 1,
3591
  "trial_name": null,
3592
  "trial_params": null