Spaces:

VikramSingh178
/

picpilot-server

Runtime error

App Files Files Community

VikramSingh178 commited on May 26

Commit

88a381f

•

1 Parent(s): cca63d4

feat: Add YOLOv8s object detection model

Browse files

Former-commit-id: 7181c5ce6b26b943a24ccc366026d4a88461c241

Files changed (5) hide show

scripts/__pycache__/config.cpython-310.pyc +0 -0
scripts/config.py +1 -0
scripts/extended_image.png +0 -0
scripts/mask.png +0 -0
scripts/utils.py +44 -103

scripts/__pycache__/config.cpython-310.pyc CHANGED Viewed

Binary files a/scripts/__pycache__/config.cpython-310.pyc and b/scripts/__pycache__/config.cpython-310.pyc differ

scripts/config.py CHANGED Viewed

@@ -7,6 +7,7 @@ PROJECT_NAME = "Product Photography"
 PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
 CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
 SEGMENTATION_MODEL_NAME = "facebook/sam-vit-huge"

 PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
 CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
 SEGMENTATION_MODEL_NAME = "facebook/sam-vit-huge"
+DETECTION_MODEL_NAME = "yolov8s"

scripts/extended_image.png ADDED Viewed

scripts/mask.png ADDED Viewed

scripts/utils.py CHANGED Viewed

@@ -2,10 +2,8 @@ import torch
 from ultralytics import YOLO
 from transformers import SamModel, SamProcessor
 import numpy as np
-from PIL import Image
-from config import SEGMENTATION_MODEL_NAME
-import cv2
-import matplotlib.pyplot as plt
 def accelerator():
     """
@@ -21,7 +19,6 @@ def accelerator():
     else:
         return "cpu"
 class ImageAugmentation:
     """
     Class for centering an image on a white background using ROI.
@@ -32,119 +29,63 @@ class ImageAugmentation:
         roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
     """
-    def __init__(self, target_width, target_height, roi_scale=0.5):
-        """
-        Initialize ImageAugmentation class.
-        Args:
-            target_width (int): Desired width of the extended image.
-            target_height (int): Desired height of the extended image.
-            roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
-        """
         self.target_width = target_width
         self.target_height = target_height
         self.roi_scale = roi_scale
-    def extend_image(self, image_path):
         """
-        Extends the given image to the specified target dimensions while maintaining the aspect ratio of the original image.
-        The image is centered based on the detected region of interest (ROI).
-        Args:
-            image_path (str): The path to the image file.
-        Returns:
-            PIL.Image.Image: The extended image with the specified dimensions.
         """
-        # Open the original image
-        original_image = cv2.imread(image_path)
-        # Convert the image to grayscale for better edge detection
-        gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
-        # Perform edge detection to find contours
-        edges = cv2.Canny(gray_image, 50, 150)
-        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        # Find the largest contour (assumed to be the ROI)
-        largest_contour = max(contours, key=cv2.contourArea)
-        # Get the bounding box of the largest contour
-        x, y, w, h = cv2.boundingRect(largest_contour)
-        # Calculate the center of the bounding box
-        roi_center_x = x + w // 2
-        roi_center_y = y + h // 2
-        # Calculate the top-left coordinates of the ROI
-        roi_x = max(0, roi_center_x - self.target_width // 2)
-        roi_y = max(0, roi_center_y - self.target_height // 2)
-        # Crop the ROI from the original image
-        roi = original_image[roi_y:roi_y+self.target_height, roi_x:roi_x+self.target_width]
-        # Create a new white background image with the target dimensions
-        extended_image = np.ones((self.target_height, self.target_width, 3), dtype=np.uint8) * 255
-        # Calculate the paste position for centering the ROI
-        paste_x = (self.target_width - roi.shape[1]) // 2
-        paste_y = (self.target_height - roi.shape[0]) // 2
-        # Paste the ROI onto the white background
-        extended_image[paste_y:paste_y+roi.shape[0], paste_x:paste_x+roi.shape[1]] = roi
-        return Image.fromarray(cv2.cvtColor(extended_image, cv2.COLOR_BGR2RGB))
-    def generate_bbox(self, image):
-        """
-        Generate bounding box for the input image.
-        Args:
-            image: The input image.
-        Returns:
-            list: Bounding box coordinates [x_min, y_min, x_max, y_max].
-        """
-        model = YOLO("yolov8s.pt")
-        results = model(image)
-        bbox = results[0].boxes.xyxy.tolist()
-        return bbox
-    def generate_mask(self, image, bbox):
         """
-        Generates masks for the given image using a segmentation model.
-        Args:
-            image: The input image for which masks need to be generated.
-            bbox: Bounding box coordinates [x_min, y_min, x_max, y_max].
-        Returns:
-            numpy.ndarray: The generated mask.
         """
-        model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
-        processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
-        # Ensure bbox is in the correct format
-        bbox_list = [bbox]  # Convert bbox to list of lists
-        # Pass bbox as a list of lists to SamProcessor
-        inputs = processor(image, input_boxes=bbox_list, return_tensors="pt").to(device=accelerator())
-        with torch.no_grad():
-          outputs = model(**inputs)
-        masks = processor.image_processor.post_process_masks(
-            outputs.pred_masks,
-            inputs["original_sizes"],
-            inputs["reshaped_input_sizes"],
-        )
-        return masks[0].cpu().numpy()
 if __name__ == "__main__":
-    augmenter = ImageAugmentation(target_width=1920, target_height=1080, roi_scale=0.3)
     image_path = "/home/product_diffusion_api/sample_data/example1.jpg"
-    extended_image = augmenter.extend_image(image_path)
-    bbox = augmenter.generate_bbox(extended_image)
-    mask = augmenter.generate_mask(extended_image, bbox)
-    plt.imsave('mask.jpg', mask)
-    #Image.fromarray(mask).save("centered_image_with_mask.jpg")

 from ultralytics import YOLO
 from transformers import SamModel, SamProcessor
 import numpy as np
+from PIL import Image, ImageOps
+from config import SEGMENTATION_MODEL_NAME, DETECTION_MODEL_NAME
 def accelerator():
     """
     else:
         return "cpu"
 class ImageAugmentation:
     """
     Class for centering an image on a white background using ROI.
         roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
     """
+    def __init__(self, target_width, target_height, roi_scale=0.6):
         self.target_width = target_width
         self.target_height = target_height
         self.roi_scale = roi_scale
+    def extend_image(self, image: Image) -> Image:
         """
+        Extends an image to fit within the specified target dimensions while maintaining the aspect ratio.
         """
+        original_width, original_height = image.size
+        scale = min(self.target_width / original_width, self.target_height / original_height)
+        new_width = int(original_width * scale * self.roi_scale)
+        new_height = int(original_height * scale * self.roi_scale)
+        resized_image = image.resize((new_width, new_height))
+        extended_image = Image.new("RGB", (self.target_width, self.target_height), "white")
+        paste_x = (self.target_width - new_width) // 2
+        paste_y = (self.target_height - new_height) // 2
+        extended_image.paste(resized_image, (paste_x, paste_y))
+        return extended_image
+    def generate_mask_from_bbox(self, image: Image) -> np.ndarray:
+        """
+        Generates a mask from the bounding box of an image using YOLO and SAM-ViT models.
+        """
+        yolo = YOLO(DETECTION_MODEL_NAME)
+        processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
+        model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(accelerator())
+        # Run YOLO detection
+        results = yolo(np.array(image))
+        bboxes = results[0].boxes.xyxy.tolist()
+        print(bboxes)
+        # Prepare inputs for SAM
+        inputs = processor(image, input_boxes=[bboxes], return_tensors="pt").to(device=accelerator())
+        with torch.no_grad():
+            outputs = model(**inputs)
+            masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+        return masks[0].numpy()
+    def invert_mask(self, mask_image: np.ndarray) -> np.ndarray:
         """
+        Inverts the given mask image.
         """
+        mask_image = (mask_image * 255).astype(np.uint8)
+        mask_pil = Image.fromarray(mask_image)
+        inverted_mask_pil = ImageOps.invert(mask_pil.convert("L"))
+        return inverted_mask_pil
 if __name__ == "__main__":
+    augmenter = ImageAugmentation(target_width=1920, target_height=1080, roi_scale=0.6)
     image_path = "/home/product_diffusion_api/sample_data/example1.jpg"
+    image = Image.open(image_path)
+    extended_image = augmenter.extend_image(image)
+    mask = augmenter.generate_mask_from_bbox(extended_image)