|
|
|
from transformers import BlipProcessor, BlipForQuestionAnswering |
|
|
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") |
|
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") |
|
|
|
|
|
|
|
def answer_question(image, text): |
|
""" |
|
Generates an answer to a given question based on the provided image and text. |
|
|
|
Args: |
|
image (str): The path to the image file. |
|
text (str): The question text. |
|
|
|
Returns: |
|
str: The generated answer to the question. |
|
""" |
|
|
|
|
|
inputs = processor(images=image, text=text, return_tensors="pt") |
|
generated_ids = model.generate(**inputs, max_length=50) |
|
|
|
|
|
generated_answer = processor.batch_decode(generated_ids, skip_special_tokens=True) |
|
|
|
|
|
return generated_answer[0] |
|
|