Vikramjeet Singh commited on
Commit
a2d3846
2 Parent(s): db442f4 cca63d4

Merge pull request #23 from VikramxD/v2

Browse files

V2

Former-commit-id: d20008671d5a970a27dc24fbd7ae04bc2c8b378b

models/yolov8s.pt.REMOVED.git-id ADDED
@@ -0,0 +1 @@
 
 
1
+ 5f7efb1ee991ebccb1ee9a360066829e6435a168
run.sh CHANGED
@@ -1,2 +1,3 @@
1
  apt-get update && apt-get install python3-dev
2
  pip install -r requirements.txt
 
 
1
  apt-get update && apt-get install python3-dev
2
  pip install -r requirements.txt
3
+ apt install libgl1-mesa-glx
scripts/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/scripts/__pycache__/config.cpython-310.pyc and b/scripts/__pycache__/config.cpython-310.pyc differ
 
scripts/__pycache__/config.cpython-312.pyc ADDED
Binary file (3.22 kB). View file
 
scripts/utils.py CHANGED
@@ -1,10 +1,11 @@
1
  import torch
2
  from ultralytics import YOLO
3
- from transformers import SamModel,SamProcessor
4
  import numpy as np
5
- from PIL import Image
6
  from config import SEGMENTATION_MODEL_NAME
7
-
 
8
 
9
  def accelerator():
10
  """
@@ -14,13 +15,11 @@ def accelerator():
14
  str: The name of the device accelerator ('cuda', 'mps', or 'cpu').
15
  """
16
  if torch.cuda.is_available():
17
- device = 'cuda'
18
  elif torch.backends.mps.is_available():
19
- device = 'mps'
20
  else:
21
- device = 'cpu'
22
- return device
23
-
24
 
25
 
26
  class ImageAugmentation:
@@ -28,84 +27,124 @@ class ImageAugmentation:
28
  Class for centering an image on a white background using ROI.
29
 
30
  Attributes:
31
- background_size (tuple): Size of the larger background where the image will be placed.
 
 
32
  """
33
 
34
- def __init__(self, background_size=(1920, 1080)):
35
  """
36
  Initialize ImageAugmentation class.
37
 
38
  Args:
39
- background_size (tuple, optional): Size of the larger background. Default is (1920, 1080).
 
 
40
  """
41
- self.background_size = background_size
 
 
42
 
43
- def center_image_on_background(self, image, roi):
44
  """
45
- Center the input image on a larger background using ROI.
 
46
 
47
  Args:
48
- image (numpy.ndarray): Input image.
49
- roi (tuple): Coordinates of the region of interest (x, y, width, height).
50
 
51
  Returns:
52
- numpy.ndarray: Image centered on a larger background.
53
  """
54
- w, h = self.background_size
55
- bg = np.ones((h, w, 3), dtype=np.uint8) * 255 # White background
56
- x, y, roi_w, roi_h = roi
57
- bg[(h - roi_h) // 2:(h - roi_h) // 2 + roi_h, (w - roi_w) // 2:(w - roi_w) // 2 + roi_w] = image
58
- return bg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- def detect_region_of_interest(self, image):
61
  """
62
- Detect the region of interest in the input image.
63
 
64
  Args:
65
- image (numpy.ndarray): Input image.
66
 
67
  Returns:
68
- tuple: Coordinates of the region of interest (x, y, width, height).
69
  """
70
- # Convert image to grayscale
71
- grayscale_image = np.array(Image.fromarray(image).convert("L"))
72
-
73
- # Calculate bounding box of non-zero region
74
- bbox = Image.fromarray(grayscale_image).getbbox()
75
  return bbox
76
 
77
- def generate_bbox(image):
78
- """
79
- Generate bounding box for the input image.
80
-
81
- Args:
82
- image_path (str): Path to the input image.
83
-
84
- Returns:
85
- tuple: Bounding box coordinates (x, y, width, height).
86
- """
87
- # Load YOLOv5 model
88
- model = YOLO("yolov8s.pt")
89
- results = model(image)
90
- # Get bounding box coordinates
91
- bbox = results[0].boxes.xyxy.int().tolist()
92
- return bbox
93
 
94
- def generate_mask():
95
- model = SamModel.from_pretrained("SEGMENTATION_MODEL_NAMEz")
96
- processor = SamProcessor.from_pretrained("SEGMENTATION_MODEL_NAME")
97
-
98
-
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
102
- augmenter = ImageAugmentation()
103
- image_path = "/Users/vikram/Python/product_diffusion_api/sample_data/example1.jpg"
104
- image = np.array(Image.open(image_path).convert("RGB"))
105
- roi = augmenter.detect_region_of_interest(image)
106
- centered_image = augmenter.center_image_on_background(image, roi)
107
- bbox = generate_bbox(centered_image)
108
- print(bbox)
109
-
110
-
111
-
 
1
  import torch
2
  from ultralytics import YOLO
3
+ from transformers import SamModel, SamProcessor
4
  import numpy as np
5
+ from PIL import Image
6
  from config import SEGMENTATION_MODEL_NAME
7
+ import cv2
8
+ import matplotlib.pyplot as plt
9
 
10
  def accelerator():
11
  """
 
15
  str: The name of the device accelerator ('cuda', 'mps', or 'cpu').
16
  """
17
  if torch.cuda.is_available():
18
+ return "cuda"
19
  elif torch.backends.mps.is_available():
20
+ return "mps"
21
  else:
22
+ return "cpu"
 
 
23
 
24
 
25
  class ImageAugmentation:
 
27
  Class for centering an image on a white background using ROI.
28
 
29
  Attributes:
30
+ target_width (int): Desired width of the extended image.
31
+ target_height (int): Desired height of the extended image.
32
+ roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
33
  """
34
 
35
+ def __init__(self, target_width, target_height, roi_scale=0.5):
36
  """
37
  Initialize ImageAugmentation class.
38
 
39
  Args:
40
+ target_width (int): Desired width of the extended image.
41
+ target_height (int): Desired height of the extended image.
42
+ roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
43
  """
44
+ self.target_width = target_width
45
+ self.target_height = target_height
46
+ self.roi_scale = roi_scale
47
 
48
+ def extend_image(self, image_path):
49
  """
50
+ Extends the given image to the specified target dimensions while maintaining the aspect ratio of the original image.
51
+ The image is centered based on the detected region of interest (ROI).
52
 
53
  Args:
54
+ image_path (str): The path to the image file.
 
55
 
56
  Returns:
57
+ PIL.Image.Image: The extended image with the specified dimensions.
58
  """
59
+ # Open the original image
60
+ original_image = cv2.imread(image_path)
61
+
62
+ # Convert the image to grayscale for better edge detection
63
+ gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
64
+
65
+ # Perform edge detection to find contours
66
+ edges = cv2.Canny(gray_image, 50, 150)
67
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
68
+
69
+ # Find the largest contour (assumed to be the ROI)
70
+ largest_contour = max(contours, key=cv2.contourArea)
71
+
72
+ # Get the bounding box of the largest contour
73
+ x, y, w, h = cv2.boundingRect(largest_contour)
74
+
75
+ # Calculate the center of the bounding box
76
+ roi_center_x = x + w // 2
77
+ roi_center_y = y + h // 2
78
+
79
+ # Calculate the top-left coordinates of the ROI
80
+ roi_x = max(0, roi_center_x - self.target_width // 2)
81
+ roi_y = max(0, roi_center_y - self.target_height // 2)
82
+
83
+ # Crop the ROI from the original image
84
+ roi = original_image[roi_y:roi_y+self.target_height, roi_x:roi_x+self.target_width]
85
+
86
+ # Create a new white background image with the target dimensions
87
+ extended_image = np.ones((self.target_height, self.target_width, 3), dtype=np.uint8) * 255
88
+
89
+ # Calculate the paste position for centering the ROI
90
+ paste_x = (self.target_width - roi.shape[1]) // 2
91
+ paste_y = (self.target_height - roi.shape[0]) // 2
92
+
93
+ # Paste the ROI onto the white background
94
+ extended_image[paste_y:paste_y+roi.shape[0], paste_x:paste_x+roi.shape[1]] = roi
95
+
96
+ return Image.fromarray(cv2.cvtColor(extended_image, cv2.COLOR_BGR2RGB))
97
+
98
 
99
+ def generate_bbox(self, image):
100
  """
101
+ Generate bounding box for the input image.
102
 
103
  Args:
104
+ image: The input image.
105
 
106
  Returns:
107
+ list: Bounding box coordinates [x_min, y_min, x_max, y_max].
108
  """
109
+ model = YOLO("yolov8s.pt")
110
+ results = model(image)
111
+ bbox = results[0].boxes.xyxy.tolist()
 
 
112
  return bbox
113
 
114
+ def generate_mask(self, image, bbox):
115
+ """
116
+ Generates masks for the given image using a segmentation model.
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ Args:
119
+ image: The input image for which masks need to be generated.
120
+ bbox: Bounding box coordinates [x_min, y_min, x_max, y_max].
 
 
121
 
122
+ Returns:
123
+ numpy.ndarray: The generated mask.
124
+ """
125
+ model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
126
+ processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
127
+
128
+ # Ensure bbox is in the correct format
129
+ bbox_list = [bbox] # Convert bbox to list of lists
130
+
131
+ # Pass bbox as a list of lists to SamProcessor
132
+ inputs = processor(image, input_boxes=bbox_list, return_tensors="pt").to(device=accelerator())
133
+ with torch.no_grad():
134
+ outputs = model(**inputs)
135
+ masks = processor.image_processor.post_process_masks(
136
+ outputs.pred_masks,
137
+ inputs["original_sizes"],
138
+ inputs["reshaped_input_sizes"],
139
+ )
140
+
141
+ return masks[0].cpu().numpy()
142
 
143
  if __name__ == "__main__":
144
+ augmenter = ImageAugmentation(target_width=1920, target_height=1080, roi_scale=0.3)
145
+ image_path = "/home/product_diffusion_api/sample_data/example1.jpg"
146
+ extended_image = augmenter.extend_image(image_path)
147
+ bbox = augmenter.generate_bbox(extended_image)
148
+ mask = augmenter.generate_mask(extended_image, bbox)
149
+ plt.imsave('mask.jpg', mask)
150
+ #Image.fromarray(mask).save("centered_image_with_mask.jpg")