Luigi commited on
Commit
634d4ff
1 Parent(s): 98d66d4

Return and show bounding box confidence

Browse files
Files changed (3) hide show
  1. rtmo_demo.py +2 -2
  2. rtmo_demo_batch.py +6 -4
  3. rtmo_gpu.py +44 -17
rtmo_demo.py CHANGED
@@ -36,7 +36,7 @@ if __name__ == "__main__":
36
  if not success:
37
  break
38
 
39
- frame_out, bboxes, keypoints, scores = body(frame)
40
 
41
  if keypoints is not None:
42
  if frame_idx % args.batch_size == 0 and frame_idx:
@@ -56,7 +56,7 @@ if __name__ == "__main__":
56
  scores,
57
  kpt_thr=0.3,
58
  line_width=2)
59
- img_show = draw_bbox(img_show, bboxes)
60
  img_show = resize_to_fit_screen(img_show, 720, 480)
61
  cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
62
  cv2.imshow(f'{model}', img_show)
 
36
  if not success:
37
  break
38
 
39
+ frame_out, bboxes, bboxes_scores, keypoints, scores = body(frame)
40
 
41
  if keypoints is not None:
42
  if frame_idx % args.batch_size == 0 and frame_idx:
 
56
  scores,
57
  kpt_thr=0.3,
58
  line_width=2)
59
+ img_show = draw_bbox(img_show, bboxes, bboxes_scores)
60
  img_show = resize_to_fit_screen(img_show, 720, 480)
61
  cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
62
  cv2.imshow(f'{model}', img_show)
rtmo_demo_batch.py CHANGED
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
24
  # Process the batch when it's full
25
  if len(batch_frames) == batch_size:
26
  s = time.time()
27
- batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
28
  det_time = time.time() - s
29
  fps = round(batch_size / det_time, 1)
30
  print(f'Batch det: {fps} FPS')
@@ -33,9 +33,10 @@ def process_video(video_path, body_estimator, batch_size=4):
33
  scores = batch_scores[i]
34
  frame = batch_frames[i]
35
  bboxes = batch_bboxes[i]
 
36
  img_show = frame.copy()
37
  img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
38
- img_show = draw_bbox(img_show, bboxes)
39
  img_show = resize_to_fit_screen(img_show, 720, 480)
40
  cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
41
  cv2.imshow(f'{video_path}', img_show)
@@ -54,14 +55,15 @@ def process_video(video_path, body_estimator, batch_size=4):
54
 
55
  # Option 2: Duplicate the last frame
56
  batch_frames.append(batch_frames[-1])
57
- batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
58
  for i, keypoints in enumerate(batch_keypoints):
59
  scores = batch_scores[i]
60
  frame = batch_frames[i]
61
  bboxes = batch_bboxes[i]
 
62
  img_show = frame.copy()
63
  img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
64
- img_show = draw_bbox(img_show, bboxes)
65
  img_show = resize_to_fit_screen(img_show, 720, 480)
66
  cv2.imshow(f'{video_path}', img_show)
67
  #cv2.waitKey(10)
 
24
  # Process the batch when it's full
25
  if len(batch_frames) == batch_size:
26
  s = time.time()
27
+ batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
28
  det_time = time.time() - s
29
  fps = round(batch_size / det_time, 1)
30
  print(f'Batch det: {fps} FPS')
 
33
  scores = batch_scores[i]
34
  frame = batch_frames[i]
35
  bboxes = batch_bboxes[i]
36
+ bboxes_scores = batch_bboxes_scores[i]
37
  img_show = frame.copy()
38
  img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
39
+ img_show = draw_bbox(img_show, bboxes, bboxes_scores)
40
  img_show = resize_to_fit_screen(img_show, 720, 480)
41
  cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
42
  cv2.imshow(f'{video_path}', img_show)
 
55
 
56
  # Option 2: Duplicate the last frame
57
  batch_frames.append(batch_frames[-1])
58
+ batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
59
  for i, keypoints in enumerate(batch_keypoints):
60
  scores = batch_scores[i]
61
  frame = batch_frames[i]
62
  bboxes = batch_bboxes[i]
63
+ bboxes_scores = batch_bboxes_scores[i]
64
  img_show = frame.copy()
65
  img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
66
+ img_show = draw_bbox(img_show, bboxes, bboxes_scores)
67
  img_show = resize_to_fit_screen(img_show, 720, 480)
68
  cv2.imshow(f'{video_path}', img_show)
69
  #cv2.waitKey(10)
rtmo_gpu.py CHANGED
@@ -207,12 +207,32 @@ def draw_mmpose(img,
207
 
208
  return img
209
 
210
- def draw_bbox(img, bboxes, color=(0, 255, 0)):
211
- for bbox in bboxes:
 
 
 
 
 
 
 
 
 
 
 
212
  img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
213
- (int(bbox[2]), int(bbox[3])), color, 2)
 
 
 
 
 
 
 
 
 
214
  return img
215
-
216
  # with simplification to use onnxruntime only
217
  def draw_skeleton(img,
218
  keypoints,
@@ -333,6 +353,8 @@ class RTMO_GPU(object):
333
  tuple:
334
  - final_boxes (np.ndarray): Final bounding boxes.
335
  - final_scores (np.ndarray): Final scores.
 
 
336
  """
337
 
338
  if not self.is_yolo_nas_pose:
@@ -346,6 +368,7 @@ class RTMO_GPU(object):
346
  isscore = final_scores > 0.3
347
  isbbox = [i for i in isscore]
348
  final_boxes = final_boxes[isbbox]
 
349
 
350
  # decode pose outputs
351
  keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
@@ -359,14 +382,15 @@ class RTMO_GPU(object):
359
  if flat_predictions.shape[0] > 0: # at least one person found
360
  mask = flat_predictions[:, 0] == 0
361
  final_boxes = flat_predictions[mask, 1:5]
 
362
  pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
363
  keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
364
  keypoints = keypoints / ratio
365
  final_boxes = final_boxes / ratio
366
  else: # no detection
367
- final_boxes, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 17, 2)), np.zeros((0, 17))
368
 
369
- return final_boxes, keypoints, scores
370
 
371
  def inference(self, img: np.ndarray):
372
  """Inference model.
@@ -425,9 +449,9 @@ class RTMO_GPU(object):
425
 
426
  outputs = self.inference(image)
427
 
428
- bboxes, keypoints, scores = self.postprocess(outputs, ratio)
429
 
430
- return bboxes, keypoints, scores
431
 
432
  def __init__(self,
433
  model: str = None,
@@ -569,22 +593,24 @@ class RTMO_GPU_Batch(RTMO_GPU):
569
  batch_keypoints = []
570
  batch_scores = []
571
  batch_bboxes = []
 
572
 
573
  b_dets, b_keypoints = outputs
574
  for i, ratio in enumerate(ratios):
575
  output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
576
- bboxes, keypoints, scores = super().postprocess(output, ratio)
577
  batch_keypoints.append(keypoints)
578
  batch_scores.append(scores)
579
  batch_bboxes.append(bboxes)
 
580
 
581
- return batch_bboxes, batch_keypoints, batch_scores
582
 
583
  def __batch_call__(self, images: List[np.ndarray]):
584
  batch_img, ratios = self.preprocess_batch(images)
585
  outputs = self.inference(batch_img)
586
- bboxes, keypoints, scores = self.postprocess_batch(outputs, ratios)
587
- return bboxes, keypoints, scores
588
 
589
  def __call__(self, image: np.array, camera_id = 0):
590
 
@@ -600,18 +626,19 @@ class RTMO_GPU_Batch(RTMO_GPU):
600
  in_queue.put(image)
601
 
602
  if len(self.buffers[camera_id]) == self.batch_size:
603
- b_bboxes, b_keypoints, b_scores = self.__batch_call__(self.buffers[camera_id])
604
  for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
605
  bboxes = b_bboxes[i]
606
- out_queue.put((bboxes, keypoints, scores))
 
607
  self.buffers[camera_id] = []
608
 
609
- frame, bboxes, keypoints, scores = None, None, None, None
610
  if not out_queue.empty():
611
- bboxes, keypoints, scores = out_queue.get()
612
  frame = in_queue.get()
613
 
614
- return frame, bboxes, keypoints, scores
615
 
616
 
617
  def __init__(self,
 
207
 
208
  return img
209
 
210
+ def draw_bbox(img, bboxes, bboxes_scores=None, color=None):
211
+ for i, bbox in enumerate(bboxes):
212
+ # Determine the color based on the score if no color is given
213
+ if color is None and bboxes_scores is not None:
214
+ # Scale the score to a color range (green to red)
215
+ score = bboxes_scores[i]
216
+ green = int((1 - score) * 255)
217
+ red = int(score * 255)
218
+ box_color = (0, green, red)
219
+ else:
220
+ box_color = color if color is not None else (0, 255, 0)
221
+
222
+ # Draw the bounding box
223
  img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
224
+ (int(bbox[2]), int(bbox[3])), box_color, 1)
225
+
226
+ # Display the score at the top-right corner of the bounding box
227
+ if bboxes_scores is not None:
228
+ score_text = f'{bboxes_scores[i]:.2f}'
229
+ text_size, _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
230
+ text_x = int(bbox[2]) - text_size[0]
231
+ text_y = int(bbox[1]) + text_size[1]
232
+ img = cv2.putText(img, score_text, (text_x, text_y),
233
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1, cv2.LINE_AA)
234
  return img
235
+
236
  # with simplification to use onnxruntime only
237
  def draw_skeleton(img,
238
  keypoints,
 
353
  tuple:
354
  - final_boxes (np.ndarray): Final bounding boxes.
355
  - final_scores (np.ndarray): Final scores.
356
+ - final keypoints
357
+ - final keypoints scores
358
  """
359
 
360
  if not self.is_yolo_nas_pose:
 
368
  isscore = final_scores > 0.3
369
  isbbox = [i for i in isscore]
370
  final_boxes = final_boxes[isbbox]
371
+ final_boxes_scores = final_scores[isbbox]
372
 
373
  # decode pose outputs
374
  keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
 
382
  if flat_predictions.shape[0] > 0: # at least one person found
383
  mask = flat_predictions[:, 0] == 0
384
  final_boxes = flat_predictions[mask, 1:5]
385
+ final_boxes_scores = flat_predictions[mask, 5]
386
  pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
387
  keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
388
  keypoints = keypoints / ratio
389
  final_boxes = final_boxes / ratio
390
  else: # no detection
391
+ final_boxes, final_boxes_scores, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 1)),np.zeros((0, 17, 2)), np.zeros((0, 17))
392
 
393
+ return final_boxes, final_boxes_scores, keypoints, scores
394
 
395
  def inference(self, img: np.ndarray):
396
  """Inference model.
 
449
 
450
  outputs = self.inference(image)
451
 
452
+ bboxes, bboxes_scores, keypoints, scores = self.postprocess(outputs, ratio)
453
 
454
+ return bboxes, bboxes_scores, keypoints, scores
455
 
456
  def __init__(self,
457
  model: str = None,
 
593
  batch_keypoints = []
594
  batch_scores = []
595
  batch_bboxes = []
596
+ batch_bboxes_scores = []
597
 
598
  b_dets, b_keypoints = outputs
599
  for i, ratio in enumerate(ratios):
600
  output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
601
+ bboxes, bboxes_scores, keypoints, scores = super().postprocess(output, ratio)
602
  batch_keypoints.append(keypoints)
603
  batch_scores.append(scores)
604
  batch_bboxes.append(bboxes)
605
+ batch_bboxes_scores.append(bboxes_scores)
606
 
607
+ return batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores
608
 
609
  def __batch_call__(self, images: List[np.ndarray]):
610
  batch_img, ratios = self.preprocess_batch(images)
611
  outputs = self.inference(batch_img)
612
+ bboxes, bboxes_scores, keypoints, scores = self.postprocess_batch(outputs, ratios)
613
+ return bboxes, bboxes_scores, keypoints, scores
614
 
615
  def __call__(self, image: np.array, camera_id = 0):
616
 
 
626
  in_queue.put(image)
627
 
628
  if len(self.buffers[camera_id]) == self.batch_size:
629
+ b_bboxes, b_bboxes_scores, b_keypoints, b_scores = self.__batch_call__(self.buffers[camera_id])
630
  for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
631
  bboxes = b_bboxes[i]
632
+ bboxes_scores = b_bboxes_scores[i]
633
+ out_queue.put((bboxes, bboxes_scores, keypoints, scores))
634
  self.buffers[camera_id] = []
635
 
636
+ frame, bboxes, bboxes_scores, keypoints, scores = None, None, None, None, None
637
  if not out_queue.empty():
638
+ bboxes, bboxes_scores, keypoints, scores = out_queue.get()
639
  frame = in_queue.get()
640
 
641
+ return frame, bboxes, bboxes_scores, keypoints, scores
642
 
643
 
644
  def __init__(self,