Martin Tomov commited on
Commit
9f37f40
β€’
1 Parent(s): b8429de

runtime broken fix

Browse files
Files changed (1) hide show
  1. app.py +201 -0
app.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('pip install gradio==4.29.0') # as gradio==4.29.0 doesn't work in requirements.txt
3
+
4
+ import random
5
+ from dataclasses import dataclass
6
+ from typing import Any, List, Dict, Optional, Union, Tuple
7
+ import cv2
8
+ import torch
9
+ import requests
10
+ import numpy as np
11
+ from PIL import Image
12
+ import matplotlib.pyplot as plt
13
+ from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
14
+ import gradio as gr
15
+ import spaces
16
+
17
+ @dataclass
18
+ class BoundingBox:
19
+ xmin: int
20
+ ymin: int
21
+ xmax: int
22
+ ymax: int
23
+
24
+ @property
25
+ def xyxy(self) -> List[float]:
26
+ return [self.xmin, self.ymin, self.xmax, self.ymax]
27
+
28
+ @dataclass
29
+ class DetectionResult:
30
+ score: float
31
+ label: str
32
+ box: BoundingBox
33
+ mask: Optional[np.ndarray] = None
34
+
35
+ @classmethod
36
+ def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
37
+ return cls(
38
+ score=detection_dict['score'],
39
+ label=detection_dict['label'],
40
+ box=BoundingBox(
41
+ xmin=detection_dict['box']['xmin'],
42
+ ymin=detection_dict['box']['ymin'],
43
+ xmax=detection_dict['box']['xmax'],
44
+ ymax=detection_dict['box']['ymax']
45
+ )
46
+ )
47
+
48
+ def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
49
+ image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
50
+ image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
51
+
52
+ for detection in detection_results:
53
+ label = detection.label
54
+ score = detection.score
55
+ box = detection.box
56
+ mask = detection.mask
57
+ color = np.random.randint(0, 256, size=3).tolist()
58
+
59
+ cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
60
+ cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
61
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
62
+
63
+ if mask is not None:
64
+ mask_uint8 = (mask * 255).astype(np.uint8)
65
+ contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
66
+ cv2.drawContours(image_cv2, contours, -1, color, 2)
67
+
68
+ return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
69
+
70
+ def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
71
+ annotated_image = annotate(image, detections)
72
+ return annotated_image
73
+
74
+ def load_image(image: Union[str, Image.Image]) -> Image.Image:
75
+ if isinstance(image, str) and image.startswith("http"):
76
+ image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
77
+ elif isinstance(image, str):
78
+ image = Image.open(image).convert("RGB")
79
+ else:
80
+ image = image.convert("RGB")
81
+ return image
82
+
83
+ def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
84
+ boxes = []
85
+ for result in detection_results:
86
+ xyxy = result.box.xyxy
87
+ boxes.append(xyxy)
88
+ return [boxes]
89
+
90
+ def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
91
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
92
+ if len(contours) == 0:
93
+ return np.array([])
94
+ largest_contour = max(contours, key=cv2.contourArea)
95
+ return largest_contour
96
+
97
+ def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
98
+ masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
99
+ masks = (masks > 0).astype(np.uint8)
100
+ if polygon_refinement:
101
+ for idx, mask in enumerate(masks):
102
+ shape = mask.shape
103
+ polygon = mask_to_polygon(mask)
104
+ masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
105
+ return list(masks)
106
+
107
+ @spaces.GPU
108
+ def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
109
+ detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
110
+ object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
111
+ labels = [label if label.endswith(".") else label+"." for label in labels]
112
+ results = object_detector(image, candidate_labels=labels, threshold=threshold)
113
+ return [DetectionResult.from_dict(result) for result in results]
114
+
115
+ @spaces.GPU
116
+ def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
117
+ segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
118
+ segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
119
+ processor = AutoProcessor.from_pretrained(segmenter_id)
120
+ boxes = get_boxes(detection_results)
121
+ inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
122
+ outputs = segmentator(**inputs)
123
+ masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
124
+ masks = refine_masks(masks, polygon_refinement)
125
+ for detection_result, mask in zip(detection_results, masks):
126
+ detection_result.mask = mask
127
+ return detection_results
128
+
129
+ def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
130
+ image = load_image(image)
131
+ detections = detect(image, labels, threshold, detector_id)
132
+ detections = segment(image, detections, polygon_refinement, segmenter_id)
133
+ return np.array(image), detections
134
+
135
+ def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
136
+ y, x = np.where(mask)
137
+ return x.min(), y.min(), x.max(), y.max()
138
+
139
+ def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
140
+ mask = detection.mask
141
+ xmin, ymin, xmax, ymax = mask_to_min_max(mask)
142
+ insect_crop = original_image[ymin:ymax, xmin:xmax]
143
+ mask_crop = mask[ymin:ymax, xmin:xmax]
144
+
145
+ # Ensure that we keep the original colors of the insect
146
+ insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
147
+
148
+ x_offset, y_offset = xmin, ymin
149
+ x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
150
+
151
+ # Place the insect onto the yellow background
152
+ background[y_offset:y_end, x_offset:x_end] = insect
153
+
154
+ def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
155
+ yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8)
156
+ for detection in detections:
157
+ if detection.mask is not None:
158
+ extract_and_paste_insect(image, detection, yellow_background)
159
+ return yellow_background
160
+
161
+ def draw_classification_boxes(image_with_insects, detections):
162
+ for detection in detections:
163
+ label = detection.label
164
+ score = detection.score
165
+ box = detection.box
166
+ color = (0, 255, 255) # Yellow color for bounding box
167
+
168
+ cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
169
+ (text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
170
+ cv2.rectangle(
171
+ image_with_insects,
172
+ (box.xmin, box.ymin - text_height - baseline),
173
+ (box.xmin + text_width, box.ymin),
174
+ color,
175
+ thickness=cv2.FILLED
176
+ )
177
+ cv2.putText(
178
+ image_with_insects,
179
+ f"{label}: {score:.2f}",
180
+ (box.xmin, box.ymin - baseline),
181
+ cv2.FONT_HERSHEY_SIMPLEX,
182
+ 0.5,
183
+ (255, 255, 255),
184
+ 2
185
+ )
186
+ return image_with_insects
187
+
188
+ def process_image(image):
189
+ labels = ["insect"]
190
+ original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
191
+ annotated_image = plot_detections(original_image, detections)
192
+ yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections)
193
+ yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects.copy(), detections)
194
+ return annotated_image, yellow_background_with_boxes
195
+
196
+ gr.Interface(
197
+ fn=process_image,
198
+ inputs=gr.Image(type="pil"),
199
+ outputs=[gr.Image(type="numpy"), gr.Image(type="numpy")],
200
+ title="🐞 InsectSAM + GroundingDINO Inference",
201
+ ).launch()