ben81828 commited on
Commit
4d12da2
·
verified ·
1 Parent(s): 100fe00

Model save

Browse files
README.md ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: apache-2.0
4
+ base_model: AdaptLLM/biomed-Qwen2-VL-2B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: qwenvl-2B-cadica-direction-scale4
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # qwenvl-2B-cadica-direction-scale4
17
+
18
+ This model is a fine-tuned version of [AdaptLLM/biomed-Qwen2-VL-2B-Instruct](https://huggingface.co/AdaptLLM/biomed-Qwen2-VL-2B-Instruct) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0022
21
+ - Num Input Tokens Seen: 11980800
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0001
41
+ - train_batch_size: 1
42
+ - eval_batch_size: 1
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 4
46
+ - gradient_accumulation_steps: 6
47
+ - total_train_batch_size: 24
48
+ - total_eval_batch_size: 4
49
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
50
+ - lr_scheduler_type: cosine
51
+ - lr_scheduler_warmup_ratio: 0.05
52
+ - training_steps: 1200
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen |
57
+ |:-------------:|:------:|:----:|:---------------:|:-----------------:|
58
+ | 0.3441 | 0.0258 | 50 | 0.3383 | 499200 |
59
+ | 0.2274 | 0.0515 | 100 | 0.1866 | 998400 |
60
+ | 0.0667 | 0.0773 | 150 | 0.0967 | 1497600 |
61
+ | 0.0459 | 0.1030 | 200 | 0.0996 | 1996800 |
62
+ | 0.0805 | 0.1288 | 250 | 0.0559 | 2496000 |
63
+ | 0.0381 | 0.1545 | 300 | 0.0309 | 2995200 |
64
+ | 0.1761 | 0.1803 | 350 | 0.0439 | 3494400 |
65
+ | 0.0146 | 0.2060 | 400 | 0.0244 | 3993600 |
66
+ | 0.0157 | 0.2318 | 450 | 0.0067 | 4492800 |
67
+ | 0.0122 | 0.2575 | 500 | 0.0080 | 4992000 |
68
+ | 0.0339 | 0.2833 | 550 | 0.0034 | 5491200 |
69
+ | 0.0217 | 0.3090 | 600 | 0.0133 | 5990400 |
70
+ | 0.0327 | 0.3348 | 650 | 0.0210 | 6489600 |
71
+ | 0.0267 | 0.3605 | 700 | 0.0053 | 6988800 |
72
+ | 0.014 | 0.3863 | 750 | 0.0053 | 7488000 |
73
+ | 0.0065 | 0.4121 | 800 | 0.0068 | 7987200 |
74
+ | 0.0306 | 0.4378 | 850 | 0.0072 | 8486400 |
75
+ | 0.0063 | 0.4636 | 900 | 0.0107 | 8985600 |
76
+ | 0.0415 | 0.4893 | 950 | 0.0072 | 9484800 |
77
+ | 0.0547 | 0.5151 | 1000 | 0.0007 | 9984000 |
78
+ | 0.0007 | 0.5408 | 1050 | 0.0568 | 10483200 |
79
+ | 0.0056 | 0.5666 | 1100 | 0.0004 | 10982400 |
80
+ | 0.0127 | 0.5923 | 1150 | 0.0000 | 11481600 |
81
+ | 0.0038 | 0.6181 | 1200 | 0.0022 | 11980800 |
82
+
83
+
84
+ ### Framework versions
85
+
86
+ - PEFT 0.12.0
87
+ - Transformers 4.47.0.dev0
88
+ - Pytorch 2.5.1+cu121
89
+ - Datasets 3.1.0
90
+ - Tokenizers 0.20.3
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8ee2634589442880539cea4215aeae5e195f22dea93b6441497472981b36691
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:050dc1f1854d68a42fb308d9dd143ddc3d6fc0d86870ea1eac6bf90317df973a
3
  size 29034840
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 12845056,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }