banao-tech commited on
Commit
fd8f826
·
verified ·
1 Parent(s): ff809c7

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +5 -12
utils.py CHANGED
@@ -361,26 +361,18 @@ def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor
361
  text_scale: float, text_padding=5, text_thickness=2, thickness=3) -> Tuple[np.ndarray, dict]:
362
  """
363
  Annotates an image with bounding boxes and labels.
364
-
365
- Args:
366
- image_source: Source image as a NumPy array.
367
- boxes: Bounding boxes in cxcywh format (normalized).
368
- logits: Confidence scores for each bounding box.
369
- phrases: List of labels.
370
- text_scale, text_padding, text_thickness, thickness: Annotation parameters.
371
-
372
- Returns:
373
- Annotated image and a dictionary of label coordinates.
374
  """
 
 
 
375
  h, w, _ = image_source.shape
376
  boxes = boxes * torch.Tensor([w, h, w, h])
377
  xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
378
  xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
379
  detections = sv.Detections(xyxy=xyxy)
380
 
381
- labels = [f"{phrase}" for phrase in range(boxes.shape[0])]
382
 
383
- # Import the custom box annotator from your project structure.
384
  from util.box_annotator import BoxAnnotator
385
  box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,
386
  text_thickness=text_thickness, thickness=thickness)
@@ -391,6 +383,7 @@ def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor
391
  return annotated_frame, label_coordinates
392
 
393
 
 
394
  def predict(model, image, caption, box_threshold, text_threshold):
395
  """
396
  Uses a Hugging Face model to perform grounded object detection.
 
361
  text_scale: float, text_padding=5, text_thickness=2, thickness=3) -> Tuple[np.ndarray, dict]:
362
  """
363
  Annotates an image with bounding boxes and labels.
 
 
 
 
 
 
 
 
 
 
364
  """
365
+ # Validate phrases input
366
+ phrases = [str(phrase) if not isinstance(phrase, str) else phrase for phrase in phrases]
367
+
368
  h, w, _ = image_source.shape
369
  boxes = boxes * torch.Tensor([w, h, w, h])
370
  xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
371
  xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
372
  detections = sv.Detections(xyxy=xyxy)
373
 
374
+ labels = [f"{phrase}" for phrase in phrases]
375
 
 
376
  from util.box_annotator import BoxAnnotator
377
  box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,
378
  text_thickness=text_thickness, thickness=thickness)
 
383
  return annotated_frame, label_coordinates
384
 
385
 
386
+
387
  def predict(model, image, caption, box_threshold, text_threshold):
388
  """
389
  Uses a Hugging Face model to perform grounded object detection.