chiru-maddala's picture
Duplicate from A-Celsius/Caption-Generator
937eb55
raw
history blame contribute delete
No virus
1.41 kB
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
import gradio as gr
model_name = "Salesforce/blip-image-captioning-base"
caption_processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def generate_captions(image, num_captions=5,size=(512, 512)):
image = image.resize(size)
if image.mode != 'RGB':
image = image.convert('RGB')
pixel_values = caption_processor(image, return_tensors='pt').to(device)
caption_ids = model.generate(
**pixel_values,
max_length=30,
num_beams=5,
num_return_sequences=num_captions,
temperature=1.0
)
captions = [
caption_processor.decode(ids, skip_special_tokens=True)
for ids in caption_ids
]
return captions
from gradio.components import Image, Textbox,Slider
interface = gr.Interface(
fn=generate_captions,
inputs=[
Image(type="pil", label="Input Image"),
Slider(minimum=1, maximum=5, step=1, label="Number of Captions")
],
outputs=Textbox(type="text", label="Captions"),
title="Image Caption Generator",
description="AI tool that creates captions based on the image provided by the user.",
)
interface.launch()