error Attempted to assign 1 x 2430 = 2430 multimodal tokens to 2376 placeholders
#21
by
werther0223
- opened
code as follow
import base64
import mimetypes
import os
import os.path
from datetime import datetime, timedelta
from vllm import LLM
from vllm.sampling_params import SamplingParams
model_name = "mistralai/Pixtral-Large-Instruct-2411"
local_path = '/home/Pixtral-Large-Instruct-2411/'
def load_system_prompt(repo_id: str, filename: str) -> str:
# file_path = hf_hub_download(repo_id=repo_id, filename=filename)
file_path = os.path.join(local_path, filename)
with open(file_path, 'r') as file:
system_prompt = file.read()
today = datetime.today().strftime('%Y-%m-%d')
yesterday = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
model_name = repo_id.split("/")[-1]
return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
def file_to_data_url(file_path: str):
with open(file_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
# _, extension = os.path.splitext(file_path)
# mime_type = f"image/{extension[1:].lower()}"
mime_type, _ = mimetypes.guess_type(file_path)
return f"data:{mime_type};base64,{encoded_string}"
SYSTEM_PROMPT = load_system_prompt(local_path, "SYSTEM_PROMPT.txt")
image_url = 'https://img-blog.csdnimg.cn/21a0effc21e24d958c38e46820a23049.png'
local_image_path = "/dfs/data/mistral/21a0effc21e24d958c38e46820a23049.png"
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Which of the depicted countries has the best food? Which the second and third and fourth? Name the country, its color on the map and one its city that is visible on the map, but is not the capital. Make absolutely sure to only name a city that can be seen on the map.",
# "text": "describe image",
},
{"type": "image_url", "image_url": {"url": file_to_data_url(local_image_path)}},
],
},
]
sampling_params = SamplingParams(max_tokens=4096)
llm = LLM(model=local_path, config_format="mistral", load_format="mistral", tokenizer_mode="mistral",
tensor_parallel_size=8, limit_mm_per_prompt={"image": 4}, device='cuda',
allowed_local_media_path='/dfs/data/mistral/'
)
outputs = llm.chat(messages, sampling_params=sampling_params)
print(outputs[0].outputs[0].text)