Martin Tomov commited on
Commit
212f0f5
1 Parent(s): b59122a
Files changed (1) hide show
  1. app.py +12 -25
app.py CHANGED
@@ -9,10 +9,8 @@ import torch
9
  import requests
10
  import numpy as np
11
  from PIL import Image
12
- import matplotlib.pyplot as plt
13
- from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
14
  import gradio as gr
15
- import spaces
16
  import json
17
 
18
  @dataclass
@@ -54,10 +52,9 @@ def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[Dete
54
  label = detection.label
55
  score = detection.score
56
  box = detection.box
57
- mask = detection.mask
58
 
59
  if include_bboxes:
60
- color = np.random.randint(0, 256, size=3).tolist()
61
  cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
62
  cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
63
  cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
@@ -65,8 +62,7 @@ def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[Dete
65
  return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
66
 
67
  def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult], include_bboxes: bool = True) -> np.ndarray:
68
- annotated_image = annotate(image, detections, include_bboxes)
69
- return annotated_image
70
 
71
  def load_image(image: Union[str, Image.Image]) -> Image.Image:
72
  if isinstance(image, str) and image.startswith("http"):
@@ -77,19 +73,14 @@ def load_image(image: Union[str, Image.Image]) -> Image.Image:
77
  image = image.convert("RGB")
78
  return image
79
 
80
- def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
81
- boxes = []
82
- for result in detection_results:
83
- xyxy = result.box.xyxy
84
- boxes.append(xyxy)
85
- return [boxes]
86
 
87
  def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
88
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
89
  if len(contours) == 0:
90
  return np.array([])
91
- largest_contour = max(contours, key=cv2.contourArea)
92
- return largest_contour
93
 
94
  def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
95
  masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
@@ -101,21 +92,19 @@ def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> L
101
  masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
102
  return list(masks)
103
 
104
- @spaces.GPU
105
- def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
106
  detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
107
- object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
108
- labels = [label if label.endswith(".") else label+"." for label in labels]
109
  results = object_detector(image, candidate_labels=labels, threshold=threshold)
110
  return [DetectionResult.from_dict(result) for result in results]
111
 
112
- @spaces.GPU
113
  def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
114
  segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
115
- segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
116
  processor = AutoProcessor.from_pretrained(segmenter_id)
117
  boxes = get_boxes(detection_results)
118
- inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
119
  outputs = segmentator(**inputs)
120
  masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
121
  masks = refine_masks(masks, polygon_refinement)
@@ -152,9 +141,7 @@ def create_yellow_background_with_insects(image: np.ndarray, detections: List[De
152
  for detection in detections:
153
  if detection.mask is not None:
154
  extract_and_paste_insect(image, detection, yellow_background)
155
- # Convert back to RGB to match Gradio's expected input format
156
- yellow_background = cv2.cvtColor(yellow_background, cv2.COLOR_BGR2RGB)
157
- return yellow_background
158
 
159
  def run_length_encoding(mask):
160
  pixels = mask.flatten()
 
9
  import requests
10
  import numpy as np
11
  from PIL import Image
 
 
12
  import gradio as gr
13
+ from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
14
  import json
15
 
16
  @dataclass
 
52
  label = detection.label
53
  score = detection.score
54
  box = detection.box
 
55
 
56
  if include_bboxes:
57
+ color = [int(c) for c in np.random.randint(0, 256, size=3)]
58
  cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
59
  cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
60
  cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
 
62
  return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
63
 
64
  def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult], include_bboxes: bool = True) -> np.ndarray:
65
+ return annotate(image, detections, include_bboxes)
 
66
 
67
  def load_image(image: Union[str, Image.Image]) -> Image.Image:
68
  if isinstance(image, str) and image.startswith("http"):
 
73
  image = image.convert("RGB")
74
  return image
75
 
76
+ def get_boxes(detection_results: List[DetectionResult]) -> List[List[float]]:
77
+ return [result.box.xyxy for result in detection_results]
 
 
 
 
78
 
79
  def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
80
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
81
  if len(contours) == 0:
82
  return np.array([])
83
+ return max(contours, key=cv2.contourArea)
 
84
 
85
  def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
86
  masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
 
92
  masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
93
  return list(masks)
94
 
95
+ def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[DetectionResult]:
 
96
  detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
97
+ object_detector = pipeline(model=detector_id, task="zero-shot-object-detection")
98
+ labels = [label if label.endswith(".") else label + "." for label in labels]
99
  results = object_detector(image, candidate_labels=labels, threshold=threshold)
100
  return [DetectionResult.from_dict(result) for result in results]
101
 
 
102
  def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
103
  segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
104
+ segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id)
105
  processor = AutoProcessor.from_pretrained(segmenter_id)
106
  boxes = get_boxes(detection_results)
107
+ inputs = processor(images=image, input_boxes=boxes, return_tensors="pt")
108
  outputs = segmentator(**inputs)
109
  masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
110
  masks = refine_masks(masks, polygon_refinement)
 
141
  for detection in detections:
142
  if detection.mask is not None:
143
  extract_and_paste_insect(image, detection, yellow_background)
144
+ return cv2.cvtColor(yellow_background, cv2.COLOR_BGR2RGB)
 
 
145
 
146
  def run_length_encoding(mask):
147
  pixels = mask.flatten()