|
|
|
import torch |
|
from transformers import AutoModel, AutoTokenizer |
|
import spaces |
|
|
|
|
|
|
|
device = "cuda" |
|
|
|
|
|
model = AutoModel.from_pretrained( |
|
"openbmb/MiniCPM-V-2_6", |
|
trust_remote_code=True, |
|
attn_implementation="sdpa", |
|
torch_dtype=torch.bfloat16, |
|
) |
|
model = model.to(device=device) |
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"openbmb/MiniCPM-V-2_6", trust_remote_code=True |
|
) |
|
model.eval() |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
def answer_question(image, question): |
|
""" |
|
Generates an answer to a given question based on the provided image and question. |
|
|
|
Args: |
|
- image (str): The path to the image file. |
|
- question (str): The question text. |
|
Returns: |
|
str: The generated answer to the question. |
|
""" |
|
|
|
msgs = [{"role": "user", "content": [image, question]}] |
|
|
|
|
|
res = model.chat( |
|
image=None, |
|
msgs=msgs, |
|
tokenizer=tokenizer, |
|
sampling=True, |
|
temperature=0.7, |
|
stream=True, |
|
system_prompt="You are an AI assistant specialized in visual content analysis. Given an image and a related question, analyze the image thoroughly and provide a precise and informative answer based on the visible content. Ensure your response is clear, accurate, and directly addresses the question.", |
|
) |
|
|
|
|
|
return "".join(res) |
|
|