Spaces:

darylalim
/

blip-image-captioner

Runtime error

App Files Files Community

Daryl Lim commited on Oct 18, 2024

Commit

e16c83b

1 Parent(s): 58c7226

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -26

app.py CHANGED Viewed

@@ -1,60 +1,81 @@
 """
-This module provides an interface for image captioning using the BLIP model.
 The interface allows users to upload an image and receive a caption.
 """
 import gradio as gr
 import spaces
-from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 # Initialize the processor and model
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = (
-    BlipForConditionalGeneration
-    .from_pretrained("Salesforce/blip-image-captioning-base")
-    .to("cuda")
-)
-def generate_caption(image: Image) -> str:
     """
-    Generates a caption for a given image using the BLIP model.
     Args:
-        image (Image): The input image as a PIL Image object.
     Returns:
-        str: The generated caption.
     """
-    inputs = processor(images=image, return_tensors="pt").to("cuda")
-    outputs = model.generate(**inputs)
-    caption = processor.decode(outputs[0], skip_special_tokens=True)
-    return caption
 @spaces.GPU
-def caption_image(image: Image) -> str:
     """
     Takes a PIL Image input and returns a caption.
     Args:
-        image (Image): The input image as a PIL Image object.
     Returns:
-        str: The generated caption or an error message.
     """
     try:
-        return generate_caption(image)
-    except Exception as e:
-        return f"An error occurred: {str(e)}"
-# Define the Gradio interface
 demo = gr.Interface(
     fn=caption_image,
-    inputs=gr.Image(type="pil"),
-    outputs="text",
     title="Image Captioning with BLIP",
     description="Upload an image to generate a caption."
 )
-# Launch the interface
 demo.launch()

 """
+This module provides an interface for image captioning using the BLIP-2 model.
 The interface allows users to upload an image and receive a caption.
 """
 import gradio as gr
 import spaces
+from transformers import BlipProcessor, BlipForConditionalGeneration, BitsAndBytesConfig
 from PIL import Image
+# Define device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Define quantization configuration
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 # Initialize the processor and model
+try:
+    processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-6.7b-coco")
+    model = Blip2ForConditionalGeneration.from_pretrained(
+        "Salesforce/blip2-opt-6.7b-coco",
+        quantization_config=quantization_config,  # Quantize model to 8-bit
+        device_map="auto",  # Efficient GPU utilization
+        torch_dtype=torch.float16  # Load weights in float16 to save memory
+    ).to(device)
+except Exception as error:
+    print(f"Error initializing model: {error}")
+def generate_caption(image: Image.Image) -> str:
     """
+    Generates a caption for the given image using the BLIP-2 model.
     Args:
+        image (PIL.Image): The input image to generate a caption for.
     Returns:
+        str: The generated caption as a string.
     """
+    if not isinstance(image, Image.Image):
+        raise ValueError("Input must be a PIL Image.")
+    try:
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        outputs = model.generate(**inputs)
+        caption = processor.decode(outputs[0], skip_special_tokens=True)
+        return caption
+    except Exception as error:
+        return f"Error generating caption: {str(error)}"
 @spaces.GPU
+def caption_image(image: Image.Image) -> str:
     """
     Takes a PIL Image input and returns a caption.
     Args:
+        image (PIL.Image): The input image to generate a caption for.
     Returns:
+        str: The generated caption, or an error message if something goes wrong.
     """
     try:
+        caption = generate_caption(image)
+        return caption
+    except Exception as error:
+        return f"An error occurred: {str(error)}"
+# Constants for Gradio interface configuration
+IMAGE_TYPE = "pil"
+OUTPUT_TYPE = "text"
+# Define the Gradio interface for image captioning
 demo = gr.Interface(
     fn=caption_image,
+    inputs=gr.Image(type=IMAGE_TYPE),
+    outputs=OUTPUT_TYPE,
     title="Image Captioning with BLIP",
     description="Upload an image to generate a caption."
 )
+# Launch the Gradio interface
 demo.launch()