import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image MARKDOWN = """ # BLIP Image Captioning # Blip fine-tuned on chest xray images 🔥

""" # Load the model and processor processor = BlipProcessor.from_pretrained("umarigan/blip-image-captioning-base-chestxray-finetuned") model = BlipForConditionalGeneration.from_pretrained("umarigan/blip-image-captioning-base-chestxray-finetuned") # Define the prediction function def generate_caption(image): text = "a photography of" inputs = processor(image, text, return_tensors="pt") out = model.generate(**inputs) caption = processor.decode(out[0], skip_special_tokens=True) return caption # Example images from your Hugging Face Space (replace with actual file paths) example_images = [ "example1.jpg", "example2.jpg", "example3.jpg" ] # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown(MARKDOWN) # Image input component with example images with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload an Image or Select an Example") gr.Examples(examples=example_images, inputs=image_input) with gr.Column(): caption_output = gr.Textbox(label="Generated Caption") # Generate button generate_button = gr.Button("Generate Caption") generate_button.click(fn=generate_caption, inputs=image_input, outputs=caption_output) # Launch the app demo.launch()