Ubuntu commited on
Commit
d151e03
1 Parent(s): 3d62c34

Added trained v1.6 model

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: llava-hf/llava-v1.6-vicuna-7b-hf
3
+ library_name: peft
4
+ license: other
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: train_2024-07-03-14-56-50
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # train_2024-07-03-14-56-50
18
+
19
+ This model is a fine-tuned version of [llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf) on the llava_med_vi_1k dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-05
39
+ - train_batch_size: 1
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - gradient_accumulation_steps: 8
43
+ - total_train_batch_size: 8
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: cosine
46
+ - num_epochs: 1.0
47
+
48
+ ### Training results
49
+
50
+
51
+
52
+ ### Framework versions
53
+
54
+ - PEFT 0.11.1
55
+ - Transformers 4.42.3
56
+ - Pytorch 2.3.1+cu121
57
+ - Datasets 2.20.0
58
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "llava-hf/llava-v1.6-vicuna-7b-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*vision_tower).*(?:v_proj|q_proj|up_proj|linear_2|o_proj|down_proj|k_proj|gate_proj|linear_1).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8528f8a3c439bf3d5cf3728c1e44517afe89946f95b6fc6f26b8440726ce9254
3
+ size 20161056
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 32000
3
+ }
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.96,
3
+ "num_input_tokens_seen": 45448,
4
+ "total_flos": 1891508470824960.0,
5
+ "train_loss": 1.1797900597254436,
6
+ "train_runtime": 413.707,
7
+ "train_samples_per_second": 0.242,
8
+ "train_steps_per_second": 0.029
9
+ }
checkpoint-12/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: llava-hf/llava-v1.6-vicuna-7b-hf
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
checkpoint-12/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "llava-hf/llava-v1.6-vicuna-7b-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*vision_tower).*(?:v_proj|q_proj|up_proj|linear_2|o_proj|down_proj|k_proj|gate_proj|linear_1).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
checkpoint-12/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8528f8a3c439bf3d5cf3728c1e44517afe89946f95b6fc6f26b8440726ce9254
3
+ size 20161056
checkpoint-12/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 32000
3
+ }
checkpoint-12/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbbf62ecff6baf846ef88da70a980ae92bc073e2be3e572ce2dbdca7b2837260
3
+ size 40569886
checkpoint-12/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
checkpoint-12/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
3
+ size 14244
checkpoint-12/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13683e771480c96e7fd48517638a56b23a2ce1b08710abfc9987642d98d1f2da
3
+ size 1064
checkpoint-12/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-12/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-12/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
checkpoint-12/tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "chat_template": "{% set system_message = 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\\'s questions.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'USER: ' + content + ' ASSISTANT:' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
41
+ "clean_up_tokenization_spaces": false,
42
+ "eos_token": "</s>",
43
+ "legacy": false,
44
+ "model_max_length": 4096,
45
+ "pad_token": "<unk>",
46
+ "padding_side": "right",
47
+ "processor_class": "LlavaNextProcessor",
48
+ "sp_model_kwargs": {},
49
+ "spaces_between_special_tokens": false,
50
+ "split_special_tokens": false,
51
+ "tokenizer_class": "LlamaTokenizer",
52
+ "unk_token": "<unk>",
53
+ "use_default_system_prompt": false
54
+ }
checkpoint-12/trainer_state.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.96,
5
+ "eval_steps": 500,
6
+ "global_step": 12,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "grad_norm": 0.5919607281684875,
14
+ "learning_rate": 3.147047612756302e-05,
15
+ "loss": 1.1881,
16
+ "num_input_tokens_seen": 18416,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 0.8,
21
+ "grad_norm": 0.6693010330200195,
22
+ "learning_rate": 3.3493649053890326e-06,
23
+ "loss": 1.1695,
24
+ "num_input_tokens_seen": 37728,
25
+ "step": 10
26
+ }
27
+ ],
28
+ "logging_steps": 5,
29
+ "max_steps": 12,
30
+ "num_input_tokens_seen": 45448,
31
+ "num_train_epochs": 1,
32
+ "save_steps": 100,
33
+ "stateful_callbacks": {
34
+ "TrainerControl": {
35
+ "args": {
36
+ "should_epoch_stop": false,
37
+ "should_evaluate": false,
38
+ "should_log": false,
39
+ "should_save": true,
40
+ "should_training_stop": true
41
+ },
42
+ "attributes": {}
43
+ }
44
+ },
45
+ "total_flos": 1891508470824960.0,
46
+ "train_batch_size": 1,
47
+ "trial_name": null,
48
+ "trial_params": null
49
+ }
checkpoint-12/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb6d614e92b929824c2bbf421fb2e15f24bf898acfacddc3fd85a7ed9a4ac91
3
+ size 5304
llamaboard_config.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ top.booster: unsloth
2
+ top.checkpoint_path: []
3
+ top.finetuning_type: lora
4
+ top.model_name: LLaVA1.5-7B-Chat
5
+ top.quantization_bit: none
6
+ top.rope_scaling: none
7
+ top.template: vicuna
8
+ top.visual_inputs: true
9
+ train.additional_target: ''
10
+ train.badam_mode: layer
11
+ train.badam_switch_interval: 50
12
+ train.badam_switch_mode: ascending
13
+ train.badam_update_ratio: 0.05
14
+ train.batch_size: 1
15
+ train.compute_type: bf16
16
+ train.create_new_adapter: false
17
+ train.cutoff_len: 512
18
+ train.dataset:
19
+ - llava_med_vi_1k
20
+ train.dataset_dir: data
21
+ train.ds_offload: false
22
+ train.ds_stage: none
23
+ train.freeze_extra_modules: ''
24
+ train.freeze_trainable_layers: 2
25
+ train.freeze_trainable_modules: all
26
+ train.galore_rank: 16
27
+ train.galore_scale: 0.25
28
+ train.galore_target: all
29
+ train.galore_update_interval: 200
30
+ train.gradient_accumulation_steps: 8
31
+ train.learning_rate: 5e-5
32
+ train.logging_steps: 5
33
+ train.lora_alpha: 16
34
+ train.lora_dropout: 0
35
+ train.lora_rank: 2
36
+ train.lora_target: ''
37
+ train.loraplus_lr_ratio: 0
38
+ train.lr_scheduler_type: cosine
39
+ train.max_grad_norm: '1.0'
40
+ train.max_samples: '100'
41
+ train.neftune_alpha: 0
42
+ train.num_train_epochs: '1.0'
43
+ train.optim: adamw_torch
44
+ train.packing: false
45
+ train.ppo_score_norm: false
46
+ train.ppo_whiten_rewards: false
47
+ train.pref_beta: 0.1
48
+ train.pref_ftx: 0
49
+ train.pref_loss: sigmoid
50
+ train.report_to: false
51
+ train.resize_vocab: false
52
+ train.reward_model: null
53
+ train.save_steps: 100
54
+ train.shift_attn: false
55
+ train.training_stage: Supervised Fine-Tuning
56
+ train.upcast_layernorm: false
57
+ train.use_badam: false
58
+ train.use_dora: false
59
+ train.use_galore: false
60
+ train.use_llama_pro: false
61
+ train.use_pissa: false
62
+ train.use_rslora: false
63
+ train.val_size: 0
64
+ train.warmup_steps: 0
preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
running_log.txt ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.model
2
+
3
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.json
4
+
5
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/added_tokens.json
6
+
7
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/special_tokens_map.json
8
+
9
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer_config.json
10
+
11
+ 07/03/2024 15:04:16 - INFO - transformers.image_processing_base - loading configuration file preprocessor_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/preprocessor_config.json
12
+
13
+ 07/03/2024 15:04:16 - INFO - transformers.image_processing_base - loading configuration file preprocessor_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/preprocessor_config.json
14
+
15
+ 07/03/2024 15:04:16 - INFO - transformers.image_processing_base - Image processor LlavaNextImageProcessor {
16
+ "aspect_ratio_setting": "anyres",
17
+ "crop_size": {
18
+ "height": 336,
19
+ "width": 336
20
+ },
21
+ "do_center_crop": true,
22
+ "do_convert_rgb": true,
23
+ "do_normalize": true,
24
+ "do_pad": true,
25
+ "do_rescale": true,
26
+ "do_resize": true,
27
+ "image_grid_pinpoints": [
28
+ [
29
+ 336,
30
+ 672
31
+ ],
32
+ [
33
+ 672,
34
+ 336
35
+ ],
36
+ [
37
+ 672,
38
+ 672
39
+ ],
40
+ [
41
+ 1008,
42
+ 336
43
+ ],
44
+ [
45
+ 336,
46
+ 1008
47
+ ]
48
+ ],
49
+ "image_mean": [
50
+ 0.48145466,
51
+ 0.4578275,
52
+ 0.40821073
53
+ ],
54
+ "image_processor_type": "LlavaNextImageProcessor",
55
+ "image_std": [
56
+ 0.26862954,
57
+ 0.26130258,
58
+ 0.27577711
59
+ ],
60
+ "processor_class": "LlavaNextProcessor",
61
+ "resample": 3,
62
+ "rescale_factor": 0.00392156862745098,
63
+ "size": {
64
+ "shortest_edge": 336
65
+ }
66
+ }
67
+
68
+
69
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.model
70
+
71
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.json
72
+
73
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/added_tokens.json
74
+
75
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/special_tokens_map.json
76
+
77
+ 07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer_config.json
78
+
79
+ 07/03/2024 15:04:16 - INFO - transformers.processing_utils - Processor LlavaNextProcessor:
80
+ - image_processor: LlavaNextImageProcessor {
81
+ "aspect_ratio_setting": "anyres",
82
+ "crop_size": {
83
+ "height": 336,
84
+ "width": 336
85
+ },
86
+ "do_center_crop": true,
87
+ "do_convert_rgb": true,
88
+ "do_normalize": true,
89
+ "do_pad": true,
90
+ "do_rescale": true,
91
+ "do_resize": true,
92
+ "image_grid_pinpoints": [
93
+ [
94
+ 336,
95
+ 672
96
+ ],
97
+ [
98
+ 672,
99
+ 336
100
+ ],
101
+ [
102
+ 672,
103
+ 672
104
+ ],
105
+ [
106
+ 1008,
107
+ 336
108
+ ],
109
+ [
110
+ 336,
111
+ 1008
112
+ ]
113
+ ],
114
+ "image_mean": [
115
+ 0.48145466,
116
+ 0.4578275,
117
+ 0.40821073
118
+ ],
119
+ "image_processor_type": "LlavaNextImageProcessor",
120
+ "image_std": [
121
+ 0.26862954,
122
+ 0.26130258,
123
+ 0.27577711
124
+ ],
125
+ "processor_class": "LlavaNextProcessor",
126
+ "resample": 3,
127
+ "rescale_factor": 0.00392156862745098,
128
+ "size": {
129
+ "shortest_edge": 336
130
+ }
131
+ }
132
+
133
+ - tokenizer: LlamaTokenizerFast(name_or_path='llava-hf/llava-v1.6-vicuna-7b-hf', vocab_size=32000, model_max_length=4096, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<unk>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={
134
+ 0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
135
+ 1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
136
+ 2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
137
+ 32000: AddedToken("<image>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
138
+ }
139
+
140
+ {
141
+ "chat_template": null,
142
+ "processor_class": "LlavaNextProcessor"
143
+ }
144
+
145
+
146
+ 07/03/2024 15:04:16 - INFO - llamafactory.data.loader - Loading dataset tminh/test-llava4...
147
+
148
+ 07/03/2024 15:04:24 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json
149
+
150
+ 07/03/2024 15:04:24 - INFO - transformers.configuration_utils - Model config LlavaNextConfig {
151
+ "_name_or_path": "llava-hf/llava-v1.6-vicuna-7b-hf",
152
+ "architectures": [
153
+ "LlavaNextForConditionalGeneration"
154
+ ],
155
+ "ignore_index": -100,
156
+ "image_grid_pinpoints": [
157
+ [
158
+ 336,
159
+ 672
160
+ ],
161
+ [
162
+ 672,
163
+ 336
164
+ ],
165
+ [
166
+ 672,
167
+ 672
168
+ ],
169
+ [
170
+ 1008,
171
+ 336
172
+ ],
173
+ [
174
+ 336,
175
+ 1008
176
+ ]
177
+ ],
178
+ "image_token_index": 32000,
179
+ "model_type": "llava_next",
180
+ "projector_hidden_act": "gelu",
181
+ "text_config": {
182
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
183
+ "architectures": [
184
+ "LlamaForCausalLM"
185
+ ],
186
+ "max_position_embeddings": 4096,
187
+ "model_type": "llama",
188
+ "pad_token_id": 0,
189
+ "rms_norm_eps": 1e-05,
190
+ "torch_dtype": "float16",
191
+ "vocab_size": 32064
192
+ },
193
+ "tie_word_embeddings": false,
194
+ "torch_dtype": "float16",
195
+ "transformers_version": "4.42.3",
196
+ "use_image_newline_parameter": true,
197
+ "vision_config": {
198
+ "hidden_size": 1024,
199
+ "image_size": 336,
200
+ "intermediate_size": 4096,
201
+ "model_type": "clip_vision_model",
202
+ "num_attention_heads": 16,
203
+ "num_hidden_layers": 24,
204
+ "patch_size": 14,
205
+ "projection_dim": 768,
206
+ "vocab_size": 32000
207
+ },
208
+ "vision_feature_layer": -2,
209
+ "vision_feature_select_strategy": "default",
210
+ "vocab_size": 32064
211
+ }
212
+
213
+
214
+ 07/03/2024 15:04:25 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json
215
+
216
+ 07/03/2024 15:04:25 - INFO - transformers.configuration_utils - Model config LlavaNextConfig {
217
+ "_name_or_path": "llava-hf/llava-v1.6-vicuna-7b-hf",
218
+ "architectures": [
219
+ "LlavaNextForConditionalGeneration"
220
+ ],
221
+ "ignore_index": -100,
222
+ "image_grid_pinpoints": [
223
+ [
224
+ 336,
225
+ 672
226
+ ],
227
+ [
228
+ 672,
229
+ 336
230
+ ],
231
+ [
232
+ 672,
233
+ 672
234
+ ],
235
+ [
236
+ 1008,
237
+ 336
238
+ ],
239
+ [
240
+ 336,
241
+ 1008
242
+ ]
243
+ ],
244
+ "image_token_index": 32000,
245
+ "model_type": "llava_next",
246
+ "projector_hidden_act": "gelu",
247
+ "text_config": {
248
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
249
+ "architectures": [
250
+ "LlamaForCausalLM"
251
+ ],
252
+ "max_position_embeddings": 4096,
253
+ "model_type": "llama",
254
+ "pad_token_id": 0,
255
+ "rms_norm_eps": 1e-05,
256
+ "torch_dtype": "float16",
257
+ "vocab_size": 32064
258
+ },
259
+ "tie_word_embeddings": false,
260
+ "torch_dtype": "float16",
261
+ "transformers_version": "4.42.3",
262
+ "use_image_newline_parameter": true,
263
+ "vision_config": {
264
+ "hidden_size": 1024,
265
+ "image_size": 336,
266
+ "intermediate_size": 4096,
267
+ "model_type": "clip_vision_model",
268
+ "num_attention_heads": 16,
269
+ "num_hidden_layers": 24,
270
+ "patch_size": 14,
271
+ "projection_dim": 768,
272
+ "vocab_size": 32000
273
+ },
274
+ "vision_feature_layer": -2,
275
+ "vision_feature_select_strategy": "default",
276
+ "vocab_size": 32064
277
+ }
278
+
279
+
280
+ 07/03/2024 15:04:25 - WARNING - llamafactory.model.model_utils.unsloth - Unsloth does not support model type llava_next.
281
+
282
+ 07/03/2024 15:04:25 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/model.safetensors.index.json
283
+
284
+ 07/03/2024 15:05:57 - INFO - transformers.modeling_utils - Instantiating LlavaNextForConditionalGeneration model under default dtype torch.bfloat16.
285
+
286
+ 07/03/2024 15:05:57 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {}
287
+
288
+
289
+ 07/03/2024 15:05:58 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
290
+ "bos_token_id": 1,
291
+ "eos_token_id": 2,
292
+ "pad_token_id": 0
293
+ }
294
+
295
+
296
+ 07/03/2024 15:08:02 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing LlavaNextForConditionalGeneration.
297
+
298
+
299
+ 07/03/2024 15:08:02 - INFO - transformers.modeling_utils - All the weights of LlavaNextForConditionalGeneration were initialized from the model checkpoint at llava-hf/llava-v1.6-vicuna-7b-hf.
300
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use LlavaNextForConditionalGeneration for predictions without further training.
301
+
302
+ 07/03/2024 15:08:02 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/generation_config.json
303
+
304
+ 07/03/2024 15:08:02 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
305
+ "bos_token_id": 1,
306
+ "eos_token_id": 2,
307
+ "pad_token_id": 0
308
+ }
309
+
310
+
311
+ 07/03/2024 15:08:02 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
312
+
313
+ 07/03/2024 15:08:02 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
314
+
315
+ 07/03/2024 15:08:02 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
316
+
317
+ 07/03/2024 15:08:02 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
318
+
319
+ 07/03/2024 15:08:02 - INFO - llamafactory.model.model_utils.misc - Found linear modules: v_proj,q_proj,up_proj,linear_2,o_proj,down_proj,k_proj,gate_proj,linear_1
320
+
321
+ 07/03/2024 15:08:02 - INFO - llamafactory.model.loader - trainable params: 5023744 || all params: 7068454912 || trainable%: 0.0711
322
+
323
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Using auto half precision backend
324
+
325
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - ***** Running training *****
326
+
327
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Num examples = 100
328
+
329
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Num Epochs = 1
330
+
331
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Instantaneous batch size per device = 1
332
+
333
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 8
334
+
335
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Gradient Accumulation steps = 8
336
+
337
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Total optimization steps = 12
338
+
339
+ 07/03/2024 15:08:02 - INFO - transformers.trainer - Number of trainable parameters = 5,023,744
340
+
341
+ 07/03/2024 15:08:04 - WARNING - transformers.models.llama.modeling_llama - `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
342
+
343
+ 07/03/2024 15:10:44 - INFO - llamafactory.extras.callbacks - {'loss': 1.1881, 'learning_rate': 3.1470e-05, 'epoch': 0.40, 'throughput': 113.65}
344
+
345
+ 07/03/2024 15:13:42 - INFO - llamafactory.extras.callbacks - {'loss': 1.1695, 'learning_rate': 3.3494e-06, 'epoch': 0.80, 'throughput': 111.00}
346
+
347
+ 07/03/2024 15:14:56 - INFO - transformers.trainer - Saving model checkpoint to saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50/checkpoint-12
348
+
349
+ 07/03/2024 15:14:56 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json
350
+
351
+ 07/03/2024 15:14:56 - INFO - transformers.configuration_utils - Model config LlavaNextConfig {
352
+ "architectures": [
353
+ "LlavaNextForConditionalGeneration"
354
+ ],
355
+ "ignore_index": -100,
356
+ "image_grid_pinpoints": [
357
+ [
358
+ 336,
359
+ 672
360
+ ],
361
+ [
362
+ 672,
363
+ 336
364
+ ],
365
+ [
366
+ 672,
367
+ 672
368
+ ],
369
+ [
370
+ 1008,
371
+ 336
372
+ ],
373
+ [
374
+ 336,
375
+ 1008
376
+ ]
377
+ ],
378
+ "image_token_index": 32000,
379
+ "model_type": "llava_next",
380
+ "projector_hidden_act": "gelu",
381
+ "text_config": {
382
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
383
+ "architectures": [
384
+ "LlamaForCausalLM"
385
+ ],
386
+ "max_position_embeddings": 4096,
387
+ "model_type": "llama",
388
+ "pad_token_id": 0,
389
+ "rms_norm_eps": 1e-05,
390
+ "torch_dtype": "float16",
391
+ "vocab_size": 32064
392
+ },
393
+ "tie_word_embeddings": false,
394
+ "torch_dtype": "float16",
395
+ "transformers_version": "4.42.3",
396
+ "use_image_newline_parameter": true,
397
+ "vision_config": {
398
+ "hidden_size": 1024,
399
+ "image_size": 336,
400
+ "intermediate_size": 4096,
401
+ "model_type": "clip_vision_model",
402
+ "num_attention_heads": 16,
403
+ "num_hidden_layers": 24,
404
+ "patch_size": 14,
405
+ "projection_dim": 768,
406
+ "vocab_size": 32000
407
+ },
408
+ "vision_feature_layer": -2,
409
+ "vision_feature_select_strategy": "default",
410
+ "vocab_size": 32064
411
+ }
412
+
413
+
414
+ 07/03/2024 15:14:56 - INFO - transformers.image_processing_base - Image processor saved in saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50/checkpoint-12/preprocessor_config.json
415
+
416
+ 07/03/2024 15:14:56 - INFO - transformers.trainer -
417
+
418
+ Training completed. Do not forget to share your model on huggingface.co/models =)
419
+
420
+
421
+
422
+ 07/03/2024 15:14:56 - INFO - transformers.trainer - Saving model checkpoint to saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50
423
+
424
+ 07/03/2024 15:14:56 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json
425
+
426
+ 07/03/2024 15:14:56 - INFO - transformers.configuration_utils - Model config LlavaNextConfig {
427
+ "architectures": [
428
+ "LlavaNextForConditionalGeneration"
429
+ ],
430
+ "ignore_index": -100,
431
+ "image_grid_pinpoints": [
432
+ [
433
+ 336,
434
+ 672
435
+ ],
436
+ [
437
+ 672,
438
+ 336
439
+ ],
440
+ [
441
+ 672,
442
+ 672
443
+ ],
444
+ [
445
+ 1008,
446
+ 336
447
+ ],
448
+ [
449
+ 336,
450
+ 1008
451
+ ]
452
+ ],
453
+ "image_token_index": 32000,
454
+ "model_type": "llava_next",
455
+ "projector_hidden_act": "gelu",
456
+ "text_config": {
457
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
458
+ "architectures": [
459
+ "LlamaForCausalLM"
460
+ ],
461
+ "max_position_embeddings": 4096,
462
+ "model_type": "llama",
463
+ "pad_token_id": 0,
464
+ "rms_norm_eps": 1e-05,
465
+ "torch_dtype": "float16",
466
+ "vocab_size": 32064
467
+ },
468
+ "tie_word_embeddings": false,
469
+ "torch_dtype": "float16",
470
+ "transformers_version": "4.42.3",
471
+ "use_image_newline_parameter": true,
472
+ "vision_config": {
473
+ "hidden_size": 1024,
474
+ "image_size": 336,
475
+ "intermediate_size": 4096,
476
+ "model_type": "clip_vision_model",
477
+ "num_attention_heads": 16,
478
+ "num_hidden_layers": 24,
479
+ "patch_size": 14,
480
+ "projection_dim": 768,
481
+ "vocab_size": 32000
482
+ },
483
+ "vision_feature_layer": -2,
484
+ "vision_feature_select_strategy": "default",
485
+ "vocab_size": 32064
486
+ }
487
+
488
+
489
+ 07/03/2024 15:14:56 - INFO - transformers.image_processing_base - Image processor saved in saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50/preprocessor_config.json
490
+
491
+ 07/03/2024 15:14:56 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot.
492
+
493
+ 07/03/2024 15:14:56 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
494
+ {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
495
+
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "chat_template": "{% set system_message = 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\\'s questions.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'USER: ' + content + ' ASSISTANT:' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
41
+ "clean_up_tokenization_spaces": false,
42
+ "eos_token": "</s>",
43
+ "legacy": false,
44
+ "model_max_length": 4096,
45
+ "pad_token": "<unk>",
46
+ "padding_side": "right",
47
+ "processor_class": "LlavaNextProcessor",
48
+ "sp_model_kwargs": {},
49
+ "spaces_between_special_tokens": false,
50
+ "split_special_tokens": false,
51
+ "tokenizer_class": "LlamaTokenizer",
52
+ "unk_token": "<unk>",
53
+ "use_default_system_prompt": false
54
+ }