Spaces:
Running
Running
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from gradio import Interface | |
from PIL import Image | |
# Load the BLIP-2 model and processor | |
processor = BlipProcessor.from_pretrained("Salesforce/blip2-flan-t5-xl") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl") | |
def generate_response(image, prompt): | |
"""Generate a response from the model based on the image and prompt.""" | |
inputs = processor(image, prompt, return_tensors="pt") | |
outputs = model.generate(**inputs) | |
return processor.decode(outputs[0], skip_special_tokens=True) | |
# Create a Gradio interface | |
def predict(image, prompt): | |
return generate_response(image, prompt) | |
interface = Interface( | |
fn=predict, | |
inputs=["image", "text"], | |
outputs="text", | |
title="BLIP-2: Introspective Monologue Generator", | |
description="Upload an image and provide a prompt. The model will respond with introspective thoughts about the image." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |