File size: 1,758 Bytes
2f58859
 
 
 
de21a2c
 
21fec85
de21a2c
e04005a
 
6400946
de21a2c
 
 
 
2f58859
 
 
 
 
 
 
 
 
 
 
 
a8cd252
2f58859
a8cd252
 
 
2f58859
 
 
 
21fec85
2f58859
 
 
 
 
a8cd252
2f58859
 
 
 
 
 
 
 
 
a8cd252
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image


MARKDOWN = """
# BLIP Image Captioning
# Blip fine-tuned on chest xray images 🔥
<div>    
  <a href="https://github.com/UmarIgan/Machine-Learning/blob/master/examples/image_captioning_blip.ipynb">        
    <img src="https://badges.aleen42.com/src/github.svg" alt="GitHub" style="display:inline-block; width: 100px;">  </a>
</div>

"""

# Load the model and processor
processor = BlipProcessor.from_pretrained("umarigan/blip-image-captioning-base-chestxray-finetuned")
model = BlipForConditionalGeneration.from_pretrained("umarigan/blip-image-captioning-base-chestxray-finetuned")

# Define the prediction function
def generate_caption(image):
    text = "a photography of"
    inputs = processor(image, text, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# Example images from your Hugging Face Space (replace with actual file paths)
example_images = [
    "example1.jpg",
    "example2.jpg",
    "example3.jpg"
]

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)

    # Image input component with example images
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload an Image or Select an Example")
            gr.Examples(examples=example_images, inputs=image_input)

        with gr.Column():
            caption_output = gr.Textbox(label="Generated Caption")

    # Generate button
    generate_button = gr.Button("Generate Caption")
    generate_button.click(fn=generate_caption, inputs=image_input, outputs=caption_output)

# Launch the app
demo.launch()