royleibov commited on
Commit
a571cde
1 Parent(s): a99e639

Add ZipNN support

Browse files
Files changed (1) hide show
  1. README.md +49 -7
README.md CHANGED
@@ -7,6 +7,42 @@ tags:
7
  - multimodal
8
  library_name: transformers
9
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Qwen2-VL-7B-Instruct
12
 
@@ -103,27 +139,30 @@ Here we show a code snippet to show you how to use the chat model with `transfor
103
  ```python
104
  from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
105
  from qwen_vl_utils import process_vision_info
 
 
 
106
 
107
  # default: Load the model on the available device(s)
108
  model = Qwen2VLForConditionalGeneration.from_pretrained(
109
- "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
110
  )
111
 
112
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
113
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
114
- # "Qwen/Qwen2-VL-7B-Instruct",
115
  # torch_dtype=torch.bfloat16,
116
  # attn_implementation="flash_attention_2",
117
  # device_map="auto",
118
  # )
119
 
120
  # default processer
121
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
122
 
123
  # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
124
  # min_pixels = 256*28*28
125
  # max_pixels = 1280*28*28
126
- # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
127
 
128
  messages = [
129
  {
@@ -172,12 +211,15 @@ import torch
172
  from torchvision import io
173
  from typing import Dict
174
  from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
 
 
 
175
 
176
  # Load the model in half-precision on the available device(s)
177
  model = Qwen2VLForConditionalGeneration.from_pretrained(
178
- "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
179
  )
180
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
181
 
182
  # Image
183
  url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
@@ -418,7 +460,7 @@ The model supports a wide range of resolution inputs. By default, it uses the na
418
  min_pixels = 256 * 28 * 28
419
  max_pixels = 1280 * 28 * 28
420
  processor = AutoProcessor.from_pretrained(
421
- "Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels
422
  )
423
  ```
424
 
 
7
  - multimodal
8
  library_name: transformers
9
  ---
10
+ # Disclaimer and Requirements
11
+
12
+ This model is a clone of [**Qwen/Qwen2-VL-7B-Instruct**](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) compressed using ZipNN. Compressed losslessly to 67% its original size, ZipNN saved ~6GB in storage and potentially ~2PB in data transfer **monthly**.
13
+
14
+ ### Requirement
15
+
16
+ In order to use the model, ZipNN is necessary:
17
+ ```bash
18
+ pip install zipnn
19
+ ```
20
+
21
+ ### Use This Model
22
+ ```python
23
+ # Load model directly
24
+ from transformers import AutoProcessor, AutoModelForSeq2SeqLM
25
+ from zipnn import zipnn_hf
26
+
27
+ zipnn_hf()
28
+
29
+ processor = AutoProcessor.from_pretrained("royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed")
30
+ model = AutoModelForSeq2SeqLM.from_pretrained("royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed")
31
+ ```
32
+ ### ZipNN
33
+ ZipNN also allows you to seemlessly save local disk space in your cache after the model is downloaded.
34
+
35
+ To compress the cached model, simply run:
36
+ ```bash
37
+ python zipnn_compress_path.py safetensors --model royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed --hf_cache
38
+ ```
39
+
40
+ The model will be decompressed automatically and safely as long as `zipnn_hf()` is added at the top of the file like in the [example above](#use-this-model).
41
+
42
+ To decompress manualy, simply run:
43
+ ```bash
44
+ python zipnn_decompress_path.py --model royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed --hf_cache
45
+ ```
46
 
47
  # Qwen2-VL-7B-Instruct
48
 
 
139
  ```python
140
  from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
141
  from qwen_vl_utils import process_vision_info
142
+ from zipnn import zipnn_hf
143
+
144
+ zipnn_hf()
145
 
146
  # default: Load the model on the available device(s)
147
  model = Qwen2VLForConditionalGeneration.from_pretrained(
148
+ "royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed", torch_dtype="auto", device_map="auto"
149
  )
150
 
151
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
152
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
153
+ # "royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed",
154
  # torch_dtype=torch.bfloat16,
155
  # attn_implementation="flash_attention_2",
156
  # device_map="auto",
157
  # )
158
 
159
  # default processer
160
+ processor = AutoProcessor.from_pretrained("royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed")
161
 
162
  # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
163
  # min_pixels = 256*28*28
164
  # max_pixels = 1280*28*28
165
+ # processor = AutoProcessor.from_pretrained("royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed", min_pixels=min_pixels, max_pixels=max_pixels)
166
 
167
  messages = [
168
  {
 
211
  from torchvision import io
212
  from typing import Dict
213
  from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
214
+ from zipnn import zipnn_hf
215
+
216
+ zipnn_hf()
217
 
218
  # Load the model in half-precision on the available device(s)
219
  model = Qwen2VLForConditionalGeneration.from_pretrained(
220
+ "royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed", torch_dtype="auto", device_map="auto"
221
  )
222
+ processor = AutoProcessor.from_pretrained("royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed")
223
 
224
  # Image
225
  url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
 
460
  min_pixels = 256 * 28 * 28
461
  max_pixels = 1280 * 28 * 28
462
  processor = AutoProcessor.from_pretrained(
463
+ "royleibov/Qwen2-VL-7B-Instruct-ZipNN-Compressed", min_pixels=min_pixels, max_pixels=max_pixels
464
  )
465
  ```
466