Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -361,26 +361,18 @@ def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor
|
|
361 |
text_scale: float, text_padding=5, text_thickness=2, thickness=3) -> Tuple[np.ndarray, dict]:
|
362 |
"""
|
363 |
Annotates an image with bounding boxes and labels.
|
364 |
-
|
365 |
-
Args:
|
366 |
-
image_source: Source image as a NumPy array.
|
367 |
-
boxes: Bounding boxes in cxcywh format (normalized).
|
368 |
-
logits: Confidence scores for each bounding box.
|
369 |
-
phrases: List of labels.
|
370 |
-
text_scale, text_padding, text_thickness, thickness: Annotation parameters.
|
371 |
-
|
372 |
-
Returns:
|
373 |
-
Annotated image and a dictionary of label coordinates.
|
374 |
"""
|
|
|
|
|
|
|
375 |
h, w, _ = image_source.shape
|
376 |
boxes = boxes * torch.Tensor([w, h, w, h])
|
377 |
xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
|
378 |
xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
|
379 |
detections = sv.Detections(xyxy=xyxy)
|
380 |
|
381 |
-
labels = [f"{phrase}" for phrase in
|
382 |
|
383 |
-
# Import the custom box annotator from your project structure.
|
384 |
from util.box_annotator import BoxAnnotator
|
385 |
box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,
|
386 |
text_thickness=text_thickness, thickness=thickness)
|
@@ -391,6 +383,7 @@ def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor
|
|
391 |
return annotated_frame, label_coordinates
|
392 |
|
393 |
|
|
|
394 |
def predict(model, image, caption, box_threshold, text_threshold):
|
395 |
"""
|
396 |
Uses a Hugging Face model to perform grounded object detection.
|
|
|
361 |
text_scale: float, text_padding=5, text_thickness=2, thickness=3) -> Tuple[np.ndarray, dict]:
|
362 |
"""
|
363 |
Annotates an image with bounding boxes and labels.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
"""
|
365 |
+
# Validate phrases input
|
366 |
+
phrases = [str(phrase) if not isinstance(phrase, str) else phrase for phrase in phrases]
|
367 |
+
|
368 |
h, w, _ = image_source.shape
|
369 |
boxes = boxes * torch.Tensor([w, h, w, h])
|
370 |
xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
|
371 |
xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
|
372 |
detections = sv.Detections(xyxy=xyxy)
|
373 |
|
374 |
+
labels = [f"{phrase}" for phrase in phrases]
|
375 |
|
|
|
376 |
from util.box_annotator import BoxAnnotator
|
377 |
box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,
|
378 |
text_thickness=text_thickness, thickness=thickness)
|
|
|
383 |
return annotated_frame, label_coordinates
|
384 |
|
385 |
|
386 |
+
|
387 |
def predict(model, image, caption, box_threshold, text_threshold):
|
388 |
"""
|
389 |
Uses a Hugging Face model to perform grounded object detection.
|