import gradio as gr | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from PIL import Image | |
import torch | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = BlipForConditionalGeneration.from_pretrained("./").to(device) | |
processor = BlipProcessor.from_pretrained("./") | |
def generate_caption(image): | |
inputs = processor(images=image, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
generated_ids = model.generate(**inputs) | |
caption = processor.decode(generated_ids[0], skip_special_tokens=True) | |
return caption | |
interface = gr.Interface(fn=generate_caption, inputs="image", outputs="text") | |
interface.launch() | |