ben81828 commited on
Commit
521fad5
·
verified ·
1 Parent(s): 839412c

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505052fd447e82a4abc24247799308483adb5e768f191f23677c8d33b4d7cac4
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085286a0e6090c82fcc13eea53f92c3af07a3dd530dc3c67c22de5abfe705112
3
  size 29034840
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42bb6e0db8e9586a2cfdb34d2bc2ea4b1fca24d5512ed2cc6a94dfc88d621a29
3
+ size 43429616
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ad9c72f07a7ba17f01f73cc06f73f462dc760207c5ffe74b0e4e37e3b5a4fb
3
+ size 43429616
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8654e95a1ad4a4e79b82252bc9462e6f0cd4bcb0c77bc24fcfc846e3563c2085
3
+ size 43429616
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83efdde6fa36dd455f51fe862e83e81798338058d0a451d3c5c54baf4da80d7d
3
+ size 43429616
last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e8027e9407df0ac39e2b3b0f9b391ceffcc0365bf7b524d551f5dbe3c76e79
3
+ size 637299
last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c20dbc40aa3a9738159e35e517f2b9b468c6b1bee8cb810efadc972e0821f0fc
3
+ size 637171
last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585439023d752dde2525d7bdccd458f3fb79989825110ba63e0cce828093c299
3
+ size 637171
last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e197c45213ad104a71f307e0f73afc54979e8843c65bd3e2a9fc618c43b98f06
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step550
 
1
+ global_step600
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae78313eb528c8d3695eebaf4de3539bd0a0bc6ee18c66af1ee183442f1758a0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81e3916b1392c4c49afb171dee5415c15f5a5a5af8749b28195fcfa0596699c
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b38031f60d9e88601d369ef46bcdcf2b5b03f2cb4ba93853bcb2328df7ebb7c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a781038dd714b87b8adb1aac8dbc8217ceb607428a992133954ad522365236e
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58092375c93d237cd0e3149aecfbf83e2acdae46279e07a32920d01cb507e64
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9446c3db15f382a5546f13622787fc99392a5e0bc8a9ca2da1838de7ab621a37
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83cd4bbff9962da7ec6787fcea8d65df7096917f9a5902e249ba7aee8887fe5f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f11e7a6b3faa884fc23044e3772ff9dd72c257f02e121665061e2a03d518bd9
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e8b881f6464ee76e192f8a5dbebbec89a38d087d3502270b9c7e6038613f3b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76b388bede074656df32b92902ac42b965557bfee0c930366af07d8382b1b4a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0033526704646646976,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-550",
4
- "epoch": 0.28328611898017,
5
  "eval_steps": 50,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -986,11 +986,100 @@
986
  "eval_steps_per_second": 0.796,
987
  "num_input_tokens_seen": 5491200,
988
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
989
  }
990
  ],
991
  "logging_steps": 5,
992
  "max_steps": 3400,
993
- "num_input_tokens_seen": 5491200,
994
  "num_train_epochs": 2,
995
  "save_steps": 50,
996
  "stateful_callbacks": {
@@ -1005,7 +1094,7 @@
1005
  "attributes": {}
1006
  }
1007
  },
1008
- "total_flos": 362284603670528.0,
1009
  "train_batch_size": 1,
1010
  "trial_name": null,
1011
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0033526704646646976,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-550",
4
+ "epoch": 0.3090394025238218,
5
  "eval_steps": 50,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
986
  "eval_steps_per_second": 0.796,
987
  "num_input_tokens_seen": 5491200,
988
  "step": 550
989
+ },
990
+ {
991
+ "epoch": 0.28586144733453517,
992
+ "grad_norm": 0.05427488923085365,
993
+ "learning_rate": 9.653522193117013e-05,
994
+ "loss": 0.0341,
995
+ "num_input_tokens_seen": 5541120,
996
+ "step": 555
997
+ },
998
+ {
999
+ "epoch": 0.28843677568890036,
1000
+ "grad_norm": 1.538376531568411,
1001
+ "learning_rate": 9.644573182726035e-05,
1002
+ "loss": 0.0341,
1003
+ "num_input_tokens_seen": 5591040,
1004
+ "step": 560
1005
+ },
1006
+ {
1007
+ "epoch": 0.2910121040432655,
1008
+ "grad_norm": 0.04402865983757039,
1009
+ "learning_rate": 9.63551432749426e-05,
1010
+ "loss": 0.0453,
1011
+ "num_input_tokens_seen": 5640960,
1012
+ "step": 565
1013
+ },
1014
+ {
1015
+ "epoch": 0.2935874323976307,
1016
+ "grad_norm": 0.1469885874710427,
1017
+ "learning_rate": 9.626345841664953e-05,
1018
+ "loss": 0.0227,
1019
+ "num_input_tokens_seen": 5690880,
1020
+ "step": 570
1021
+ },
1022
+ {
1023
+ "epoch": 0.2961627607519959,
1024
+ "grad_norm": 0.3459204695983954,
1025
+ "learning_rate": 9.617067942074153e-05,
1026
+ "loss": 0.0135,
1027
+ "num_input_tokens_seen": 5740800,
1028
+ "step": 575
1029
+ },
1030
+ {
1031
+ "epoch": 0.29873808910636107,
1032
+ "grad_norm": 1.7604264611882452,
1033
+ "learning_rate": 9.607680848145558e-05,
1034
+ "loss": 0.0439,
1035
+ "num_input_tokens_seen": 5790720,
1036
+ "step": 580
1037
+ },
1038
+ {
1039
+ "epoch": 0.30131341746072626,
1040
+ "grad_norm": 2.1661707703583204,
1041
+ "learning_rate": 9.598184781885318e-05,
1042
+ "loss": 0.0344,
1043
+ "num_input_tokens_seen": 5840640,
1044
+ "step": 585
1045
+ },
1046
+ {
1047
+ "epoch": 0.3038887458150914,
1048
+ "grad_norm": 0.19674934066326408,
1049
+ "learning_rate": 9.588579967876806e-05,
1050
+ "loss": 0.0244,
1051
+ "num_input_tokens_seen": 5890560,
1052
+ "step": 590
1053
+ },
1054
+ {
1055
+ "epoch": 0.3064640741694566,
1056
+ "grad_norm": 4.463551986915384,
1057
+ "learning_rate": 9.578866633275288e-05,
1058
+ "loss": 0.022,
1059
+ "num_input_tokens_seen": 5940480,
1060
+ "step": 595
1061
+ },
1062
+ {
1063
+ "epoch": 0.3090394025238218,
1064
+ "grad_norm": 0.08358626804613765,
1065
+ "learning_rate": 9.569045007802559e-05,
1066
+ "loss": 0.0217,
1067
+ "num_input_tokens_seen": 5990400,
1068
+ "step": 600
1069
+ },
1070
+ {
1071
+ "epoch": 0.3090394025238218,
1072
+ "eval_loss": 0.013290103524923325,
1073
+ "eval_runtime": 18.8576,
1074
+ "eval_samples_per_second": 3.182,
1075
+ "eval_steps_per_second": 0.795,
1076
+ "num_input_tokens_seen": 5990400,
1077
+ "step": 600
1078
  }
1079
  ],
1080
  "logging_steps": 5,
1081
  "max_steps": 3400,
1082
+ "num_input_tokens_seen": 5990400,
1083
  "num_train_epochs": 2,
1084
  "save_steps": 50,
1085
  "stateful_callbacks": {
 
1094
  "attributes": {}
1095
  }
1096
  },
1097
+ "total_flos": 395224583241728.0,
1098
  "train_batch_size": 1,
1099
  "trial_name": null,
1100
  "trial_params": null