Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 4

Commit

fd8f826

verified ·

1 Parent(s): ff809c7

Update utils.py

Browse files

Files changed (1) hide show

utils.py +5 -12

utils.py CHANGED Viewed

@@ -361,26 +361,18 @@ def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor
              text_scale: float, text_padding=5, text_thickness=2, thickness=3) -> Tuple[np.ndarray, dict]:
     """
     Annotates an image with bounding boxes and labels.
-    Args:
-        image_source: Source image as a NumPy array.
-        boxes: Bounding boxes in cxcywh format (normalized).
-        logits: Confidence scores for each bounding box.
-        phrases: List of labels.
-        text_scale, text_padding, text_thickness, thickness: Annotation parameters.
-    Returns:
-        Annotated image and a dictionary of label coordinates.
     """
     h, w, _ = image_source.shape
     boxes = boxes * torch.Tensor([w, h, w, h])
     xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
     xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
     detections = sv.Detections(xyxy=xyxy)
-    labels = [f"{phrase}" for phrase in range(boxes.shape[0])]
-    # Import the custom box annotator from your project structure.
     from util.box_annotator import BoxAnnotator
     box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,
                                  text_thickness=text_thickness, thickness=thickness)
@@ -391,6 +383,7 @@ def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor
     return annotated_frame, label_coordinates
 def predict(model, image, caption, box_threshold, text_threshold):
     """
     Uses a Hugging Face model to perform grounded object detection.

              text_scale: float, text_padding=5, text_thickness=2, thickness=3) -> Tuple[np.ndarray, dict]:
     """
     Annotates an image with bounding boxes and labels.
     """
+    # Validate phrases input
+    phrases = [str(phrase) if not isinstance(phrase, str) else phrase for phrase in phrases]
     h, w, _ = image_source.shape
     boxes = boxes * torch.Tensor([w, h, w, h])
     xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
     xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
     detections = sv.Detections(xyxy=xyxy)
+    labels = [f"{phrase}" for phrase in phrases]
     from util.box_annotator import BoxAnnotator
     box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,
                                  text_thickness=text_thickness, thickness=thickness)
     return annotated_frame, label_coordinates
 def predict(model, image, caption, box_threshold, text_threshold):
     """
     Uses a Hugging Face model to perform grounded object detection.