OpenGVLab
/

InternVL2-2B

@@ -330,7 +330,7 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
@@ -346,13 +346,15 @@ If `ImportError` occurs while executing this case, please install the required d
 When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
 ```python
 from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
 from lmdeploy.vl import load_image
 from lmdeploy.vl.constants import IMAGE_TOKEN
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
@@ -378,7 +380,7 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
@@ -402,7 +404,7 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig, Genera
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
@@ -418,29 +420,55 @@ print(sess.response.text)
 #### Service
-For lmdeploy v0.5.0, please configure the chat template config first. Create the following JSON file `chat_template.json`.
 ```json
 {
-    "model_name":"internlm2",
-    "meta_instruction":"我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。",
     "stop_words":["<|im_start|>", "<|im_end|>"]
 }
 ```
-LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
 ```shell
-lmdeploy serve api_server OpenGVLab/InternVL2-2B --backend turbomind --chat-template chat_template.json
 ```
-The default port of `api_server` is `23333`. After the server is launched, you can communicate with server on terminal through `api_client`:
 ```shell
-lmdeploy serve api_client http://0.0.0.0:23333
 ```
-You can overview and try out `api_server` APIs online by swagger UI at `http://0.0.0.0:23333`, or you can also read the API specification from [here](https://github.com/InternLM/lmdeploy/blob/main/docs/en/serving/restful_api.md).
 ## License
@@ -550,7 +578,7 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
@@ -572,7 +600,7 @@ from lmdeploy.vl import load_image
 from lmdeploy.vl.constants import IMAGE_TOKEN
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
@@ -597,7 +625,7 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
@@ -621,7 +649,7 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig, Genera
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
-system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
@@ -637,12 +665,12 @@ print(sess.response.text)
 #### API部署
-对于 lmdeploy v0.5.0，请先配置聊天模板配置文件。创建如下的 JSON 文件 `chat_template.json`。
 ```json
 {
-    "model_name":"internlm2",
-    "meta_instruction":"我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。",
     "stop_words":["<|im_start|>", "<|im_end|>"]
 }
 ```
@@ -650,16 +678,42 @@ print(sess.response.text)
 LMDeploy 的 `api_server` 使模型能够通过一个命令轻松打包成服务。提供的 RESTful API 与 OpenAI 的接口兼容。以下是服务启动的示例：
 ```shell
-lmdeploy serve api_server OpenGVLab/InternVL2-2B --backend turbomind --chat-template chat_template.json
 ```
-`api_server` 的默认端口是 `23333`。服务器启动后，你可以通过 `api_client` 在终端与服务器通信：
 ```shell
-lmdeploy serve api_client http://0.0.0.0:23333
 ```
-你可以通过 `http://0.0.0.0:23333` 的 swagger UI 在线查看和试用 `api_server` 的 API，也可以从 [这里](https://github.com/InternLM/lmdeploy/blob/main/docs/en/serving/restful_api.md) 阅读 API 规范。
 ## 开源许可证

 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
+> Warning: Due to the scarcity of multi-image conversation data, the performance on multi-image tasks may be unstable, and it may require multiple attempts to achieve satisfactory results.
 ```python
 from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
 from lmdeploy.vl import load_image
 from lmdeploy.vl.constants import IMAGE_TOKEN
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
 #### Service
+To deploy InternVL2, please configure the chat template config first. Create the following JSON file `chat_template.json`.
 ```json
 {
+    "model_name":"internvl-internlm2",
+    "meta_instruction":"我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。",
     "stop_words":["<|im_start|>", "<|im_end|>"]
 }
 ```
+LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup.
 ```shell
+lmdeploy serve api_server OpenGVLab/InternVL2-2B --model-name InternVL2-2B --backend turbomind --server-port 23333 --chat-template chat_template.json
 ```
+To use the OpenAI-style interface, you need to install OpenAI:
 ```shell
+pip install openai
 ```
+Then, use the code below to make the API call:
+```python
+from openai import OpenAI
+client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
+model_name = client.models.list().data[0].id
+response = client.chat.completions.create(
+    model="InternVL2-2B",
+    messages=[{
+        'role':
+        'user',
+        'content': [{
+            'type': 'text',
+            'text': 'describe this image',
+        }, {
+            'type': 'image_url',
+            'image_url': {
+                'url':
+                'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
+            },
+        }],
+    }],
+    temperature=0.8,
+    top_p=0.8)
+print(response)
+```
 ## License
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 from lmdeploy.vl.constants import IMAGE_TOKEN
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
 from lmdeploy.vl import load_image
 model = 'OpenGVLab/InternVL2-2B'
+system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
 chat_template_config = ChatTemplateConfig('internvl-internlm2')
 chat_template_config.meta_instruction = system_prompt
 pipe = pipeline(model, chat_template_config=chat_template_config,
 #### API部署
+为了部署InternVL2，请先配置聊天模板配置文件。创建如下的 JSON 文件 `chat_template.json`。
 ```json
 {
+    "model_name":"internvl-internlm2",
+    "meta_instruction":"我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。",
     "stop_words":["<|im_start|>", "<|im_end|>"]
 }
 ```
 LMDeploy 的 `api_server` 使模型能够通过一个命令轻松打包成服务。提供的 RESTful API 与 OpenAI 的接口兼容。以下是服务启动的示例：
 ```shell
+lmdeploy serve api_server OpenGVLab/InternVL2-2B --model-name InternVL2-2B --backend turbomind --server-port 23333 --chat-template chat_template.json
 ```
+为了使用OpenAI风格的API接口，您需要安装OpenAI:
 ```shell
+pip install openai
 ```
+然后，使用下面的代码进行API调用:
+```python
+from openai import OpenAI
+client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
+model_name = client.models.list().data[0].id
+response = client.chat.completions.create(
+    model="InternVL2-2B",
+    messages=[{
+        'role':
+        'user',
+        'content': [{
+            'type': 'text',
+            'text': 'describe this image',
+        }, {
+            'type': 'image_url',
+            'image_url': {
+                'url':
+                'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
+            },
+        }],
+    }],
+    temperature=0.8,
+    top_p=0.8)
+print(response)
+```
 ## 开源许可证

config.json CHANGED Viewed

@@ -91,7 +91,7 @@
     "tie_word_embeddings": false,
     "tokenizer_class": null,
     "top_k": 50,
-    "top_p": null,
     "torch_dtype": "bfloat16",
     "torchscript": false,
     "transformers_version": "4.37.2",

     "tie_word_embeddings": false,
     "tokenizer_class": null,
     "top_k": 50,
+    "top_p": 1.0,
     "torch_dtype": "bfloat16",
     "torchscript": false,
     "transformers_version": "4.37.2",

conversation.py CHANGED Viewed

@@ -330,13 +330,16 @@ def get_conv_template(name: str) -> Conversation:
     return conv_templates[name].copy()
-# Note that for inference, using the Hermes-2 and internlm2-chat templates is equivalent.
 register_conv_template(
     Conversation(
         name='Hermes-2',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
-        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
@@ -357,7 +360,7 @@ register_conv_template(
         name='internlm2-chat',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
-        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
@@ -376,7 +379,7 @@ register_conv_template(
         name='phi3-chat',
         system_template='<|system|>\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
-        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,

     return conv_templates[name].copy()
+# Both Hermes-2 and internlm2-chat are chatml-format conversation templates. The difference
+# is that during training, the preprocessing function for the Hermes-2 template doesn't add
+# <s> at the beginning of the tokenized sequence, while the internlm2-chat template does.
+# Therefore, they are completely equivalent during inference.
 register_conv_template(
     Conversation(
         name='Hermes-2',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         name='internlm2-chat',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         name='phi3-chat',
         system_template='<|system|>\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,