ben81828 commited on
Commit
3983024
·
verified ·
1 Parent(s): 8d10e80

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b20ab8c2a7403c32454801b8a1cf7e477efa58783a51bc7e3abf420b274c08
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fef821b7ed03f4855afe73a282dfe1eaf891fe7aec11dbae66d77bf1e75802cb
3
  size 18516456
last-checkpoint/global_step2249/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e0e15067425dd19e23913af5cadb71f7c0ce8e8e4aa92ebeb26f3b70866e88
3
+ size 27700976
last-checkpoint/global_step2249/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65544af251b9481248990fe36ee789bde1d9e030cee2b6d5d7e33daf332b7c3e
3
+ size 27700976
last-checkpoint/global_step2249/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80689559aea7baa93195feb4e4c40c89aff9367fd678be7d88a6c66663b38ed3
3
+ size 27700976
last-checkpoint/global_step2249/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4abb06aa79e8fe0825c5ad8ff77c8bdc03a26fc1a37371992d32f819b63e2f
3
+ size 27700976
last-checkpoint/global_step2249/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd223d5575f1d4caa0358b7085f8743404ec64526534fd4e13f730325601388
3
+ size 411571
last-checkpoint/global_step2249/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:299465c58a3533bd287fde01ba5114f5a6421d904e8715e5cac80ccde8331aff
3
+ size 411507
last-checkpoint/global_step2249/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6de7e2553a35e5a51c605253c768cde53f5c97a670676902669fa198173d4cd
3
+ size 411507
last-checkpoint/global_step2249/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c37d01755a8d68a4d6b72f9db4088b481a4af1fcfe1e3f6f7bc555daa306d4
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2199
 
1
+ global_step2249
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3187a61ccc2722c440dc24ae4a6eefe6b9e5daccf9e92473bbb4483c7751ea77
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d25cbcbbaa0866ea9c7365cb49b84e805db119693e615f5a1898a6ebfe997e8
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0f2a0df922fb3337cf2562745ebe8d5adf433ca45cb4e3da33a21b48183c000
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a883389afac12125c2c6bf62631b7de0220fdb0020d24cd0c6e8f8858dd3b362
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4d84b5276f687f44c9af60b1e41cd7b93a6d1659e36831a7bc021b5635d663b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577d49de6d60035e159d9ebb1e6eabef79a55787b14ecea93a6a93c242661779
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d69159433c88b97106cf21b92eb5a3f66f0c826aa268d82a47b3faed1ac86cd
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f2dd1c21e06806a9ce39eeab45734dfb8a62b829f91a86d1f65f13102d6242
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6225488c9a450b7edfa6b28ac40ecd217bccdb84073c98a64aefcefa7ee337d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6909ad505e808e7099dbcdd8062e5535575cbfa3b4d3a7b7d3390e6a93ed3b49
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 1.1328869430852433,
5
  "eval_steps": 50,
6
- "global_step": 2200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3923,11 +3923,100 @@
3923
  "eval_steps_per_second": 0.939,
3924
  "num_input_tokens_seen": 25725560,
3925
  "step": 2200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3926
  }
3927
  ],
3928
  "logging_steps": 5,
3929
  "max_steps": 3400,
3930
- "num_input_tokens_seen": 25725560,
3931
  "num_train_epochs": 2,
3932
  "save_steps": 50,
3933
  "stateful_callbacks": {
@@ -3942,7 +4031,7 @@
3942
  "attributes": {}
3943
  }
3944
  },
3945
- "total_flos": 1444609144651776.0,
3946
  "train_batch_size": 1,
3947
  "trial_name": null,
3948
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.158640226628895,
5
  "eval_steps": 50,
6
+ "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3923
  "eval_steps_per_second": 0.939,
3924
  "num_input_tokens_seen": 25725560,
3925
  "step": 2200
3926
+ },
3927
+ {
3928
+ "epoch": 1.1354622714396085,
3929
+ "grad_norm": 4.616007617470174,
3930
+ "learning_rate": 3.0138142386977787e-05,
3931
+ "loss": 0.3465,
3932
+ "num_input_tokens_seen": 25784048,
3933
+ "step": 2205
3934
+ },
3935
+ {
3936
+ "epoch": 1.1380375997939738,
3937
+ "grad_norm": 4.752551024155875,
3938
+ "learning_rate": 2.991522876735154e-05,
3939
+ "loss": 0.3077,
3940
+ "num_input_tokens_seen": 25842512,
3941
+ "step": 2210
3942
+ },
3943
+ {
3944
+ "epoch": 1.140612928148339,
3945
+ "grad_norm": 6.021213921198953,
3946
+ "learning_rate": 2.9692790155527227e-05,
3947
+ "loss": 0.4497,
3948
+ "num_input_tokens_seen": 25900992,
3949
+ "step": 2215
3950
+ },
3951
+ {
3952
+ "epoch": 1.143188256502704,
3953
+ "grad_norm": 8.098592782255322,
3954
+ "learning_rate": 2.9470831812210837e-05,
3955
+ "loss": 0.3811,
3956
+ "num_input_tokens_seen": 25959448,
3957
+ "step": 2220
3958
+ },
3959
+ {
3960
+ "epoch": 1.1457635848570693,
3961
+ "grad_norm": 6.108837560432838,
3962
+ "learning_rate": 2.924935898674992e-05,
3963
+ "loss": 0.4053,
3964
+ "num_input_tokens_seen": 26017936,
3965
+ "step": 2225
3966
+ },
3967
+ {
3968
+ "epoch": 1.1483389132114346,
3969
+ "grad_norm": 7.709937017464705,
3970
+ "learning_rate": 2.902837691700945e-05,
3971
+ "loss": 0.3421,
3972
+ "num_input_tokens_seen": 26076440,
3973
+ "step": 2230
3974
+ },
3975
+ {
3976
+ "epoch": 1.1509142415657996,
3977
+ "grad_norm": 3.840146275079161,
3978
+ "learning_rate": 2.880789082924798e-05,
3979
+ "loss": 0.3228,
3980
+ "num_input_tokens_seen": 26134896,
3981
+ "step": 2235
3982
+ },
3983
+ {
3984
+ "epoch": 1.1534895699201648,
3985
+ "grad_norm": 6.088757703790803,
3986
+ "learning_rate": 2.858790593799405e-05,
3987
+ "loss": 0.3695,
3988
+ "num_input_tokens_seen": 26193368,
3989
+ "step": 2240
3990
+ },
3991
+ {
3992
+ "epoch": 1.15606489827453,
3993
+ "grad_norm": 3.8647543120940844,
3994
+ "learning_rate": 2.8368427445922696e-05,
3995
+ "loss": 0.3463,
3996
+ "num_input_tokens_seen": 26251848,
3997
+ "step": 2245
3998
+ },
3999
+ {
4000
+ "epoch": 1.158640226628895,
4001
+ "grad_norm": 4.425454601086007,
4002
+ "learning_rate": 2.8149460543732664e-05,
4003
+ "loss": 0.3442,
4004
+ "num_input_tokens_seen": 26310336,
4005
+ "step": 2250
4006
+ },
4007
+ {
4008
+ "epoch": 1.158640226628895,
4009
+ "eval_loss": 0.7066138386726379,
4010
+ "eval_runtime": 15.9558,
4011
+ "eval_samples_per_second": 3.76,
4012
+ "eval_steps_per_second": 0.94,
4013
+ "num_input_tokens_seen": 26310336,
4014
+ "step": 2250
4015
  }
4016
  ],
4017
  "logging_steps": 5,
4018
  "max_steps": 3400,
4019
+ "num_input_tokens_seen": 26310336,
4020
  "num_train_epochs": 2,
4021
  "save_steps": 50,
4022
  "stateful_callbacks": {
 
4031
  "attributes": {}
4032
  }
4033
  },
4034
+ "total_flos": 1477446954123264.0,
4035
  "train_batch_size": 1,
4036
  "trial_name": null,
4037
  "trial_params": null