czczup commited on
Commit
a65cafe
1 Parent(s): 2812c53

Update README.md

Browse files
Files changed (3) hide show
  1. README.md +5 -9
  2. config.json +2 -59
  3. vocab.json +0 -0
README.md CHANGED
@@ -2,10 +2,8 @@
2
  license: mit
3
  pipeline_tag: image-text-to-text
4
  library_name: transformers
5
- base_model:
6
- - OpenGVLab/InternViT-6B-448px-V2_5
7
- - Qwen/Qwen2.5-32B-Instruct
8
- base_model_relation: merge
9
  language:
10
  - multilingual
11
  tags:
@@ -82,8 +80,6 @@ If `ImportError` occurs while executing this case, please install the required d
82
 
83
  When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
84
 
85
- question = 'Describe this video in detail.'
86
-
87
  ```python
88
  from lmdeploy import pipeline, TurbomindEngineConfig
89
  from lmdeploy.vl import load_image
@@ -147,7 +143,7 @@ print(sess.response.text)
147
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
148
 
149
  ```shell
150
- lmdeploy serve api_server OpenGVLab/InternVL2_5-38B-AWQ --backend turbomind --server-port 23333 --tp 2
151
  ```
152
 
153
  To use the OpenAI-style interface, you need to install OpenAI:
@@ -186,7 +182,7 @@ print(response)
186
 
187
  ## License
188
 
189
- This project is released under the MIT License. This project uses the pre-trained Qwen2.5-72B-Instruct as a component, which is licensed under the Qwen License.
190
 
191
  ## Citation
192
 
@@ -218,4 +214,4 @@ If you find this project useful in your research, please consider citing:
218
  pages={24185--24198},
219
  year={2024}
220
  }
221
- ```
 
2
  license: mit
3
  pipeline_tag: image-text-to-text
4
  library_name: transformers
5
+ base_model: OpenGVLab/InternVL2_5-38B
6
+ base_model_relation: quantized
 
 
7
  language:
8
  - multilingual
9
  tags:
 
80
 
81
  When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
82
 
 
 
83
  ```python
84
  from lmdeploy import pipeline, TurbomindEngineConfig
85
  from lmdeploy.vl import load_image
 
143
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
144
 
145
  ```shell
146
+ lmdeploy serve api_server OpenGVLab/InternVL2_5-38B-AWQ --server-port 23333 --tp 2
147
  ```
148
 
149
  To use the OpenAI-style interface, you need to install OpenAI:
 
182
 
183
  ## License
184
 
185
+ This project is released under the MIT License. This project uses the pre-trained Qwen2.5-32B-Instruct as a component, which is licensed under the Apache License 2.0.
186
 
187
  ## Citation
188
 
 
214
  pages={24185--24198},
215
  year={2024}
216
  }
217
+ ```
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "_commit_hash": "ac62eaa874a0c1c901def518452e315a369ac034",
3
- "_name_or_path": "/models/141/huggingface_hub/hub/models--OpenGVLab--InternVL2_5-38B/snapshots/ac62eaa874a0c1c901def518452e315a369ac034/",
4
  "architectures": [
5
  "InternVLChatModel"
6
  ],
@@ -19,8 +18,8 @@
19
  "architectures": [
20
  "Qwen2ForCausalLM"
21
  ],
22
- "attention_dropout": 0.0,
23
  "attn_implementation": "eager",
 
24
  "bad_words_ids": null,
25
  "begin_suppress_tokens": null,
26
  "bos_token_id": 151643,
@@ -111,92 +110,36 @@
111
  "select_layer": -1,
112
  "template": "internvl2_5",
113
  "torch_dtype": "float16",
114
- "transformers_version": null,
115
  "use_backbone_lora": 0,
116
  "use_llm_lora": 0,
117
  "use_thumbnail": true,
118
  "vision_config": {
119
- "_attn_implementation_autoset": true,
120
- "_name_or_path": "",
121
- "add_cross_attention": false,
122
  "architectures": [
123
  "InternVisionModel"
124
  ],
125
  "attention_dropout": 0.0,
126
- "bad_words_ids": null,
127
- "begin_suppress_tokens": null,
128
- "bos_token_id": null,
129
- "chunk_size_feed_forward": 0,
130
- "cross_attention_hidden_size": null,
131
- "decoder_start_token_id": null,
132
- "diversity_penalty": 0.0,
133
- "do_sample": false,
134
  "drop_path_rate": 0.0,
135
  "dropout": 0.0,
136
- "early_stopping": false,
137
- "encoder_no_repeat_ngram_size": 0,
138
- "eos_token_id": null,
139
- "exponential_decay_length_penalty": null,
140
- "finetuning_task": null,
141
- "forced_bos_token_id": null,
142
- "forced_eos_token_id": null,
143
  "hidden_act": "gelu",
144
  "hidden_size": 3200,
145
- "id2label": {
146
- "0": "LABEL_0",
147
- "1": "LABEL_1"
148
- },
149
  "image_size": 448,
150
  "initializer_factor": 0.1,
151
  "initializer_range": 1e-10,
152
  "intermediate_size": 12800,
153
- "is_decoder": false,
154
- "is_encoder_decoder": false,
155
- "label2id": {
156
- "LABEL_0": 0,
157
- "LABEL_1": 1
158
- },
159
  "layer_norm_eps": 1e-06,
160
- "length_penalty": 1.0,
161
- "max_length": 20,
162
- "min_length": 0,
163
  "model_type": "intern_vit_6b",
164
- "no_repeat_ngram_size": 0,
165
  "norm_type": "rms_norm",
166
  "num_attention_heads": 25,
167
- "num_beam_groups": 1,
168
- "num_beams": 1,
169
  "num_channels": 3,
170
  "num_hidden_layers": 45,
171
- "num_return_sequences": 1,
172
  "output_attentions": false,
173
  "output_hidden_states": false,
174
- "output_scores": false,
175
- "pad_token_id": null,
176
  "patch_size": 14,
177
- "prefix": null,
178
- "problem_type": null,
179
- "pruned_heads": {},
180
  "qk_normalization": true,
181
  "qkv_bias": false,
182
- "remove_invalid_values": false,
183
- "repetition_penalty": 1.0,
184
  "return_dict": true,
185
- "return_dict_in_generate": false,
186
- "sep_token_id": null,
187
- "suppress_tokens": null,
188
- "task_specific_params": null,
189
- "temperature": 1.0,
190
- "tf_legacy_loss": false,
191
- "tie_encoder_decoder": false,
192
- "tie_word_embeddings": true,
193
- "tokenizer_class": null,
194
- "top_k": 50,
195
- "top_p": 1.0,
196
  "torch_dtype": "bfloat16",
197
- "torchscript": false,
198
  "transformers_version": "4.47.0",
199
- "typical_p": 1.0,
200
  "use_bfloat16": true,
201
  "use_flash_attn": false
202
  }
 
1
  {
2
+ "_commit_hash": null,
 
3
  "architectures": [
4
  "InternVLChatModel"
5
  ],
 
18
  "architectures": [
19
  "Qwen2ForCausalLM"
20
  ],
 
21
  "attn_implementation": "eager",
22
+ "attention_dropout": 0.0,
23
  "bad_words_ids": null,
24
  "begin_suppress_tokens": null,
25
  "bos_token_id": 151643,
 
110
  "select_layer": -1,
111
  "template": "internvl2_5",
112
  "torch_dtype": "float16",
 
113
  "use_backbone_lora": 0,
114
  "use_llm_lora": 0,
115
  "use_thumbnail": true,
116
  "vision_config": {
 
 
 
117
  "architectures": [
118
  "InternVisionModel"
119
  ],
120
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
121
  "drop_path_rate": 0.0,
122
  "dropout": 0.0,
 
 
 
 
 
 
 
123
  "hidden_act": "gelu",
124
  "hidden_size": 3200,
 
 
 
 
125
  "image_size": 448,
126
  "initializer_factor": 0.1,
127
  "initializer_range": 1e-10,
128
  "intermediate_size": 12800,
 
 
 
 
 
 
129
  "layer_norm_eps": 1e-06,
 
 
 
130
  "model_type": "intern_vit_6b",
 
131
  "norm_type": "rms_norm",
132
  "num_attention_heads": 25,
 
 
133
  "num_channels": 3,
134
  "num_hidden_layers": 45,
 
135
  "output_attentions": false,
136
  "output_hidden_states": false,
 
 
137
  "patch_size": 14,
 
 
 
138
  "qk_normalization": true,
139
  "qkv_bias": false,
 
 
140
  "return_dict": true,
 
 
 
 
 
 
 
 
 
 
 
141
  "torch_dtype": "bfloat16",
 
142
  "transformers_version": "4.47.0",
 
143
  "use_bfloat16": true,
144
  "use_flash_attn": false
145
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff