3v324v23 commited on
Commit
79e43c5
1 Parent(s): 9d5e393
iter_8721.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f5a84387c6351ccf3016f7558b040d98ddaf8b334326cfa6b264d55ba59129
3
+ size 19676784
iter_8721.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b779a7d3003162fefdb0361c75f340f6d0e2e73f50b6d0bec8e95c4e180e74c9
3
+ size 19676912
iter_8721.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a083fe2a0f23b7307593f8be03f4865d8e198b884dfb14cf78cc0530ff6a7b
3
+ size 19676784
iter_8721.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e45c1d8bdac4984bc0c4b7aad3ebe14dc73145337ffd9c4bd7d4f82016c0d82b
3
+ size 19676848
iter_8721.pth/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1ab247dc95e126f4b705a008e5e2d5a0e9e49588c38101a8d8a7844705e63e5
3
+ size 13767724
pretrain.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM = ''
2
+ accumulative_counts = 4
3
+ batch_size = 16
4
+ betas = (
5
+ 0.9,
6
+ 0.999,
7
+ )
8
+ custom_hooks = [
9
+ dict(
10
+ tokenizer=dict(
11
+ padding_side='right',
12
+ pretrained_model_name_or_path='internlm/internlm2-chat-1_8b',
13
+ trust_remote_code=True,
14
+ type='transformers.AutoTokenizer.from_pretrained'),
15
+ type='xtuner.engine.hooks.DatasetInfoHook'),
16
+ dict(
17
+ evaluation_images='https://llava-vl.github.io/static/images/view.jpg',
18
+ evaluation_inputs=[
19
+ '请描述一下这张照片',
20
+ 'Please describe this picture',
21
+ ],
22
+ every_n_iters=2000,
23
+ image_processor=dict(
24
+ pretrained_model_name_or_path='google/siglip-so400m-patch14-384',
25
+ trust_remote_code=True,
26
+ type='transformers.SiglipImageProcessor.from_pretrained'),
27
+ prompt_template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
28
+ system='',
29
+ tokenizer=dict(
30
+ padding_side='right',
31
+ pretrained_model_name_or_path='internlm/internlm2-chat-1_8b',
32
+ trust_remote_code=True,
33
+ type='transformers.AutoTokenizer.from_pretrained'),
34
+ type='xtuner.engine.hooks.EvaluateChatHook'),
35
+ ]
36
+ data_path = './LLaVA-Pretrain/blip_laion_cc_sbu_558k.json'
37
+ data_root = './'
38
+ dataloader_num_workers = 16
39
+ default_hooks = dict(
40
+ checkpoint=dict(
41
+ by_epoch=False,
42
+ interval=2000,
43
+ max_keep_ckpts=2,
44
+ type='mmengine.hooks.CheckpointHook'),
45
+ logger=dict(
46
+ interval=10,
47
+ log_metric_by_epoch=False,
48
+ type='mmengine.hooks.LoggerHook'),
49
+ param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'),
50
+ sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'),
51
+ timer=dict(type='mmengine.hooks.IterTimerHook'))
52
+ env_cfg = dict(
53
+ cudnn_benchmark=False,
54
+ dist_cfg=dict(backend='nccl'),
55
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
56
+ evaluation_freq = 2000
57
+ evaluation_images = 'https://llava-vl.github.io/static/images/view.jpg'
58
+ evaluation_inputs = [
59
+ '请描述一下这张照片',
60
+ 'Please describe this picture',
61
+ ]
62
+ image_folder = './LLaVA-Pretrain/images'
63
+ image_processor = dict(
64
+ pretrained_model_name_or_path='google/siglip-so400m-patch14-384',
65
+ trust_remote_code=True,
66
+ type='transformers.SiglipImageProcessor.from_pretrained')
67
+ launcher = 'pytorch'
68
+ llava_dataset = dict(
69
+ data_path='./LLaVA-Pretrain/blip_laion_cc_sbu_558k.json',
70
+ dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn',
71
+ image_folder='./LLaVA-Pretrain/images',
72
+ image_processor=dict(
73
+ pretrained_model_name_or_path='google/siglip-so400m-patch14-384',
74
+ trust_remote_code=True,
75
+ type='transformers.SiglipImageProcessor.from_pretrained'),
76
+ max_length=1472,
77
+ pad_image_to_square=False,
78
+ template_map_fn=dict(
79
+ template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
80
+ type='xtuner.dataset.map_fns.template_map_fn_factory'),
81
+ tokenizer=dict(
82
+ padding_side='right',
83
+ pretrained_model_name_or_path='internlm/internlm2-chat-1_8b',
84
+ trust_remote_code=True,
85
+ type='transformers.AutoTokenizer.from_pretrained'),
86
+ type='xtuner.dataset.LLaVADataset')
87
+ llm_name_or_path = 'internlm/internlm2-chat-1_8b'
88
+ load_from = None
89
+ log_level = 'INFO'
90
+ log_processor = dict(by_epoch=False)
91
+ lr = 0.001
92
+ max_epochs = 1
93
+ max_length = 1472
94
+ max_norm = 1
95
+ model = dict(
96
+ freeze_llm=True,
97
+ freeze_visual_encoder=True,
98
+ llm=dict(
99
+ pretrained_model_name_or_path='internlm/internlm2-chat-1_8b',
100
+ quantization_config=dict(
101
+ bnb_4bit_compute_dtype='torch.float16',
102
+ bnb_4bit_quant_type='nf4',
103
+ bnb_4bit_use_double_quant=True,
104
+ llm_int8_has_fp16_weight=False,
105
+ llm_int8_threshold=6.0,
106
+ load_in_4bit=True,
107
+ load_in_8bit=False,
108
+ type='transformers.BitsAndBytesConfig'),
109
+ torch_dtype='torch.float16',
110
+ trust_remote_code=True,
111
+ type='transformers.AutoModelForCausalLM.from_pretrained'),
112
+ type='xtuner.model.LLaVAModel',
113
+ visual_encoder=dict(
114
+ pretrained_model_name_or_path='google/siglip-so400m-patch14-384',
115
+ type='transformers.SiglipVisionModel.from_pretrained'))
116
+ optim_type = 'torch.optim.AdamW'
117
+ optim_wrapper = dict(
118
+ optimizer=dict(
119
+ betas=(
120
+ 0.9,
121
+ 0.999,
122
+ ),
123
+ lr=0.001,
124
+ type='torch.optim.AdamW',
125
+ weight_decay=0),
126
+ type='DeepSpeedOptimWrapper')
127
+ param_scheduler = [
128
+ dict(
129
+ begin=0,
130
+ by_epoch=True,
131
+ convert_to_iter_based=True,
132
+ end=0.03,
133
+ start_factor=1e-05,
134
+ type='mmengine.optim.LinearLR'),
135
+ dict(
136
+ begin=0.03,
137
+ by_epoch=True,
138
+ convert_to_iter_based=True,
139
+ end=1,
140
+ eta_min=0.0,
141
+ type='mmengine.optim.CosineAnnealingLR'),
142
+ ]
143
+ prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.internlm2_chat'
144
+ randomness = dict(deterministic=False, seed=None)
145
+ resume = False
146
+ runner_type = 'FlexibleRunner'
147
+ save_steps = 2000
148
+ save_total_limit = 2
149
+ strategy = dict(
150
+ config=dict(
151
+ bf16=dict(enabled=True),
152
+ fp16=dict(enabled=False, initial_scale_power=16),
153
+ gradient_accumulation_steps='auto',
154
+ gradient_clipping='auto',
155
+ train_micro_batch_size_per_gpu='auto',
156
+ zero_allow_untested_optimizer=True,
157
+ zero_force_ds_cpu_optimizer=False,
158
+ zero_optimization=dict(overlap_comm=True, stage=2)),
159
+ exclude_frozen_parameters=True,
160
+ gradient_accumulation_steps=4,
161
+ gradient_clipping=1,
162
+ train_micro_batch_size_per_gpu=16,
163
+ type='xtuner.engine.DeepSpeedStrategy')
164
+ tokenizer = dict(
165
+ padding_side='right',
166
+ pretrained_model_name_or_path='internlm/internlm2-chat-1_8b',
167
+ trust_remote_code=True,
168
+ type='transformers.AutoTokenizer.from_pretrained')
169
+ train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop')
170
+ train_dataloader = dict(
171
+ batch_size=16,
172
+ collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
173
+ dataset=dict(
174
+ data_path='./LLaVA-Pretrain/blip_laion_cc_sbu_558k.json',
175
+ dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn',
176
+ image_folder='./LLaVA-Pretrain/images',
177
+ image_processor=dict(
178
+ pretrained_model_name_or_path='google/siglip-so400m-patch14-384',
179
+ trust_remote_code=True,
180
+ type='transformers.SiglipImageProcessor.from_pretrained'),
181
+ max_length=1472,
182
+ pad_image_to_square=False,
183
+ template_map_fn=dict(
184
+ template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
185
+ type='xtuner.dataset.map_fns.template_map_fn_factory'),
186
+ tokenizer=dict(
187
+ padding_side='right',
188
+ pretrained_model_name_or_path='internlm/internlm2-chat-1_8b',
189
+ trust_remote_code=True,
190
+ type='transformers.AutoTokenizer.from_pretrained'),
191
+ type='xtuner.dataset.LLaVADataset'),
192
+ num_workers=16,
193
+ sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler'))
194
+ visual_encoder_name_or_path = 'google/siglip-so400m-patch14-384'
195
+ visualizer = dict(
196
+ type='mmengine.visualization.Visualizer',
197
+ vis_backends=[
198
+ dict(type='mmengine.visualization.TensorboardVisBackend'),
199
+ ])
200
+ warmup_ratio = 0.03
201
+ weight_decay = 0
202
+ work_dir = './work_dirs/pretrain'