Spaces:

marcellohalfeld
/

ibm_1

Sleeping

ibm_1 / app.py

Update app.py

f343ed3 verified 7 months ago

1.42 kB

	import gradio as gr
	import numpy as np
	from PIL import Image
	from transformers import AutoProcessor, BlipForConditionalGeneration

	# HuggingFace
	# Load model directly
	from transformers import AutoProcessor, AutoModelForSeq2SeqLM

	processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/blip-image-captioning-base")
	#processor = # write your code here
	#model = # write your code here

	def caption_image(input_image: np.ndarray):
	# Convert numpy array to PIL Image and convert to RGB
	raw_image = Image.fromarray(input_image).convert('RGB')

	# Process the image
	# You do not need a question for image captioning
	text = "the image of"
	inputs = processor(images=image, text=text, return_tensors="pt")


	# Generate a caption for the image
	# Generate a caption for the image
	outputs = model.generate(**inputs, max_length=50)

	# Decode the generated tokens to text and store it into `caption`
	# Decode the generated tokens to text
	caption = processor.decode(outputs[0], skip_special_tokens=True)
	# Print the caption
	#print(caption)


	return caption

	iface = gr.Interface(
	fn=caption_image,
	inputs=gr.Image(),
	outputs="text",
	title="Image Captioning",
	description="This is a simple web app for generating captions for images using a trained model."
	)