InsectSAM / app.py
Martin Tomov
keycv2 fix
2131407 verified
raw
history blame
8.63 kB
import os
os.system('pip install gradio==4.29.0') # as gradio==4.29.0 doesn't work in requirements.txt
import random
from dataclasses import dataclass
from typing import Any, List, Dict, Optional, Union, Tuple
import cv2
import torch
import requests
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
import gradio as gr
import spaces
@dataclass
class BoundingBox:
xmin: int
ymin: int
xmax: int
ymax: int
@property
def xyxy(self) -> List[float]:
return [self.xmin, self.ymin, self.xmax, self.ymax]
@dataclass
class DetectionResult:
score: float
label: str
box: BoundingBox
mask: Optional[np.ndarray] = None
@classmethod
def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
return cls(
score=detection_dict['score'],
label=detection_dict['label'],
box=BoundingBox(
xmin=detection_dict['box']['xmin'],
ymin=detection_dict['box']['ymin'],
xmax=detection_dict['box']['xmax'],
ymax=detection_dict['box']['ymax']
)
)
def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
for detection in detection_results:
label = detection.label
score = detection.score
box = detection.box
mask = detection.mask
color = np.random.randint(0, 256, size=3).tolist()
cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
if mask is not None:
mask_uint8 = (mask * 255).astype(np.uint8)
contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(image_cv2, contours, -1, color, 2)
return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
annotated_image = annotate(image, detections)
return annotated_image
def load_image(image: Union[str, Image.Image]) -> Image.Image:
if isinstance(image, str) and image.startswith("http"):
image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
elif isinstance(image, str):
image = Image.open(image).convert("RGB")
else:
image = image.convert("RGB")
return image
def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
boxes = []
for result in detection_results:
xyxy = result.box.xyxy
boxes.append(xyxy)
return [boxes]
def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) == 0:
return np.array([])
largest_contour = max(contours, key=cv2.contourArea)
return largest_contour
def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
masks = (masks > 0).astype(np.uint8)
if polygon_refinement:
for idx, mask in enumerate(masks):
shape = mask.shape
polygon = mask_to_polygon(mask)
masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
return list(masks)
@spaces.GPU
def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
labels = [label if label.endswith(".") else label+"." for label in labels]
results = object_detector(image, candidate_labels=labels, threshold=threshold)
return [DetectionResult.from_dict(result) for result in results]
@spaces.GPU
def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
processor = AutoProcessor.from_pretrained(segmenter_id)
boxes = get_boxes(detection_results)
inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
outputs = segmentator(**inputs)
masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
masks = refine_masks(masks, polygon_refinement)
for detection_result, mask in zip(detection_results, masks):
detection_result.mask = mask
return detection_results
def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
image = load_image(image)
detections = detect(image, labels, threshold, detector_id)
detections = segment(image, detections, polygon_refinement, segmenter_id)
return np.array(image), detections
def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
y, x = np.where(mask)
return x.min(), y.min(), x.max(), y.max()
def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
mask = detection.mask
xmin, ymin, xmax, ymax = mask_to_min_max(mask)
insect_crop = original_image[ymin:ymax, xmin:xmax]
mask_crop = mask[ymin:ymax, xmin:xmax]
insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
x_offset, y_offset = detection.box.xmin, detection.box.ymin
x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
inverse_mask = cv2.bitwise_not(mask_crop)
bg_region = background[y_offset:y_end, x_offset:x_end]
bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask)
combined = cv2.add(insect, bg_ready)
background[y_offset:y_end, x_offset:x_end] = combined
def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8)
for detection in detections:
if detection.mask is not None:
extract_and_paste_insect(image, detection, yellow_background)
return yellow_background
def draw_classification_boxes(image_with_insects, detections):
for detection in detections:
label = detection.label
score = detection.score
box = detection.box
color = np.random.randint(0, 256, size=3).tolist()
cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
(text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
cv2.rectangle(
image_with_insects,
(box.xmin, box.ymin - text_height - baseline),
(box.xmin + text_width, box.ymin),
color,
thickness=cv2.FILLED
)
cv2.putText(
image_with_insects,
f"{label}: {score:.2f}",
(box.xmin, box.ymin - baseline),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
2
)
return image_with_insects
def process_image(image):
labels = ["insect"]
original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
annotated_image = plot_detections(original_image, detections)
yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections)
yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects.copy(), detections)
return annotated_image, yellow_background_with_boxes
gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(type="numpy"), gr.Image(type="numpy")],
title="🐞 Insect Detection and Masking"
).launch()