Luigi
commited on
Commit
•
0bf1eb7
1
Parent(s):
846e714
Show bounding box on screen too
Browse files- rtmo_demo.py +3 -2
- rtmo_demo_batch.py +7 -3
- rtmo_gpu.py +27 -17
rtmo_demo.py
CHANGED
@@ -5,7 +5,7 @@ import cv2
|
|
5 |
from pathlib import Path
|
6 |
import argparse
|
7 |
import os
|
8 |
-
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
|
9 |
|
10 |
if __name__ == "__main__":
|
11 |
|
@@ -36,7 +36,7 @@ if __name__ == "__main__":
|
|
36 |
if not success:
|
37 |
break
|
38 |
|
39 |
-
frame_out, keypoints, scores = body(frame)
|
40 |
|
41 |
if keypoints is not None:
|
42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
@@ -56,6 +56,7 @@ if __name__ == "__main__":
|
|
56 |
scores,
|
57 |
kpt_thr=0.3,
|
58 |
line_width=2)
|
|
|
59 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
60 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
61 |
cv2.imshow(f'{model}', img_show)
|
|
|
5 |
from pathlib import Path
|
6 |
import argparse
|
7 |
import os
|
8 |
+
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen, draw_bbox
|
9 |
|
10 |
if __name__ == "__main__":
|
11 |
|
|
|
36 |
if not success:
|
37 |
break
|
38 |
|
39 |
+
frame_out, bboxes, keypoints, scores = body(frame)
|
40 |
|
41 |
if keypoints is not None:
|
42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
|
|
56 |
scores,
|
57 |
kpt_thr=0.3,
|
58 |
line_width=2)
|
59 |
+
img_show = draw_bbox(img_show, bboxes)
|
60 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
61 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
62 |
cv2.imshow(f'{model}', img_show)
|
rtmo_demo_batch.py
CHANGED
@@ -4,7 +4,7 @@ import time
|
|
4 |
import cv2
|
5 |
from pathlib import Path
|
6 |
import argparse
|
7 |
-
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
|
8 |
|
9 |
def process_video(video_path, body_estimator, batch_size=4):
|
10 |
cap = cv2.VideoCapture(video_path)
|
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
24 |
# Process the batch when it's full
|
25 |
if len(batch_frames) == batch_size:
|
26 |
s = time.time()
|
27 |
-
batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
28 |
det_time = time.time() - s
|
29 |
fps = round(batch_size / det_time, 1)
|
30 |
print(f'Batch det: {fps} FPS')
|
@@ -32,8 +32,10 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
32 |
for i, keypoints in enumerate(batch_keypoints):
|
33 |
scores = batch_scores[i]
|
34 |
frame = batch_frames[i]
|
|
|
35 |
img_show = frame.copy()
|
36 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
|
|
37 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
38 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
39 |
cv2.imshow(f'{video_path}', img_show)
|
@@ -52,12 +54,14 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
52 |
|
53 |
# Option 2: Duplicate the last frame
|
54 |
batch_frames.append(batch_frames[-1])
|
55 |
-
batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
56 |
for i, keypoints in enumerate(batch_keypoints):
|
57 |
scores = batch_scores[i]
|
58 |
frame = batch_frames[i]
|
|
|
59 |
img_show = frame.copy()
|
60 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
|
|
61 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
62 |
cv2.imshow(f'{video_path}', img_show)
|
63 |
#cv2.waitKey(10)
|
|
|
4 |
import cv2
|
5 |
from pathlib import Path
|
6 |
import argparse
|
7 |
+
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen, draw_bbox # Ensure to import RTMO_GPU_Batch
|
8 |
|
9 |
def process_video(video_path, body_estimator, batch_size=4):
|
10 |
cap = cv2.VideoCapture(video_path)
|
|
|
24 |
# Process the batch when it's full
|
25 |
if len(batch_frames) == batch_size:
|
26 |
s = time.time()
|
27 |
+
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
28 |
det_time = time.time() - s
|
29 |
fps = round(batch_size / det_time, 1)
|
30 |
print(f'Batch det: {fps} FPS')
|
|
|
32 |
for i, keypoints in enumerate(batch_keypoints):
|
33 |
scores = batch_scores[i]
|
34 |
frame = batch_frames[i]
|
35 |
+
bboxes = batch_bboxes[i]
|
36 |
img_show = frame.copy()
|
37 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
38 |
+
img_show = draw_bbox(img_show, bboxes)
|
39 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
40 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
41 |
cv2.imshow(f'{video_path}', img_show)
|
|
|
54 |
|
55 |
# Option 2: Duplicate the last frame
|
56 |
batch_frames.append(batch_frames[-1])
|
57 |
+
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
58 |
for i, keypoints in enumerate(batch_keypoints):
|
59 |
scores = batch_scores[i]
|
60 |
frame = batch_frames[i]
|
61 |
+
bboxes = batch_bboxes[i]
|
62 |
img_show = frame.copy()
|
63 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
64 |
+
img_show = draw_bbox(img_show, bboxes)
|
65 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
66 |
cv2.imshow(f'{video_path}', img_show)
|
67 |
#cv2.waitKey(10)
|
rtmo_gpu.py
CHANGED
@@ -207,6 +207,12 @@ def draw_mmpose(img,
|
|
207 |
|
208 |
return img
|
209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
# with simplification to use onnxruntime only
|
211 |
def draw_skeleton(img,
|
212 |
keypoints,
|
@@ -339,7 +345,7 @@ class RTMO_GPU(object):
|
|
339 |
final_boxes /= ratio
|
340 |
isscore = final_scores > 0.3
|
341 |
isbbox = [i for i in isscore]
|
342 |
-
|
343 |
|
344 |
# decode pose outputs
|
345 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
@@ -352,14 +358,15 @@ class RTMO_GPU(object):
|
|
352 |
flat_predictions = outputs[0]
|
353 |
if flat_predictions.shape[0] > 0: # at least one person found
|
354 |
mask = flat_predictions[:, 0] == 0
|
355 |
-
|
356 |
-
pred_joints = flat_predictions[mask, 6:].reshape((len(
|
357 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
358 |
keypoints = keypoints / ratio
|
|
|
359 |
else: # no detection
|
360 |
-
keypoints, scores = np.zeros((0, 17, 2)), np.zeros((0, 17))
|
361 |
|
362 |
-
return keypoints, scores
|
363 |
|
364 |
def inference(self, img: np.ndarray):
|
365 |
"""Inference model.
|
@@ -418,9 +425,9 @@ class RTMO_GPU(object):
|
|
418 |
|
419 |
outputs = self.inference(image)
|
420 |
|
421 |
-
keypoints, scores = self.postprocess(outputs, ratio)
|
422 |
|
423 |
-
return keypoints, scores
|
424 |
|
425 |
def __init__(self,
|
426 |
model: str = None,
|
@@ -561,38 +568,41 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
561 |
"""
|
562 |
batch_keypoints = []
|
563 |
batch_scores = []
|
|
|
564 |
|
565 |
b_dets, b_keypoints = outputs
|
566 |
for i, ratio in enumerate(ratios):
|
567 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
568 |
-
keypoints, scores = super().postprocess(output, ratio)
|
569 |
batch_keypoints.append(keypoints)
|
570 |
batch_scores.append(scores)
|
|
|
571 |
|
572 |
-
return batch_keypoints, batch_scores
|
573 |
|
574 |
def __batch_call__(self, images: List[np.ndarray]):
|
575 |
batch_img, ratios = self.preprocess_batch(images)
|
576 |
outputs = self.inference(batch_img)
|
577 |
-
keypoints, scores = self.postprocess_batch(outputs, ratios)
|
578 |
-
return keypoints, scores
|
579 |
|
580 |
def __call__(self, image: np.array):
|
581 |
self.buffer.append(image)
|
582 |
self.in_queue.put(image)
|
583 |
|
584 |
if len(self.buffer) == self.batch_size:
|
585 |
-
b_keypoints, b_scores = self.__batch_call__(self.buffer)
|
586 |
-
for keypoints, scores in zip(b_keypoints, b_scores):
|
587 |
-
|
|
|
588 |
self.buffer = []
|
589 |
|
590 |
-
frame, keypoints, scores = None, None, None
|
591 |
if not self.out_queue.empty():
|
592 |
-
keypoints, scores = self.out_queue.get()
|
593 |
frame = self.in_queue.get()
|
594 |
|
595 |
-
return frame, keypoints, scores
|
596 |
|
597 |
|
598 |
def __init__(self,
|
|
|
207 |
|
208 |
return img
|
209 |
|
210 |
+
def draw_bbox(img, bboxes, color=(0, 255, 0)):
|
211 |
+
for bbox in bboxes:
|
212 |
+
img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
|
213 |
+
(int(bbox[2]), int(bbox[3])), color, 2)
|
214 |
+
return img
|
215 |
+
|
216 |
# with simplification to use onnxruntime only
|
217 |
def draw_skeleton(img,
|
218 |
keypoints,
|
|
|
345 |
final_boxes /= ratio
|
346 |
isscore = final_scores > 0.3
|
347 |
isbbox = [i for i in isscore]
|
348 |
+
final_boxes = final_boxes[isbbox]
|
349 |
|
350 |
# decode pose outputs
|
351 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
|
|
358 |
flat_predictions = outputs[0]
|
359 |
if flat_predictions.shape[0] > 0: # at least one person found
|
360 |
mask = flat_predictions[:, 0] == 0
|
361 |
+
final_boxes = flat_predictions[mask, 1:5]
|
362 |
+
pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
|
363 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
364 |
keypoints = keypoints / ratio
|
365 |
+
final_boxes = final_boxes / ratio
|
366 |
else: # no detection
|
367 |
+
final_boxes, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 17, 2)), np.zeros((0, 17))
|
368 |
|
369 |
+
return final_boxes, keypoints, scores
|
370 |
|
371 |
def inference(self, img: np.ndarray):
|
372 |
"""Inference model.
|
|
|
425 |
|
426 |
outputs = self.inference(image)
|
427 |
|
428 |
+
bboxes, keypoints, scores = self.postprocess(outputs, ratio)
|
429 |
|
430 |
+
return bboxes, keypoints, scores
|
431 |
|
432 |
def __init__(self,
|
433 |
model: str = None,
|
|
|
568 |
"""
|
569 |
batch_keypoints = []
|
570 |
batch_scores = []
|
571 |
+
batch_bboxes = []
|
572 |
|
573 |
b_dets, b_keypoints = outputs
|
574 |
for i, ratio in enumerate(ratios):
|
575 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
576 |
+
bboxes, keypoints, scores = super().postprocess(output, ratio)
|
577 |
batch_keypoints.append(keypoints)
|
578 |
batch_scores.append(scores)
|
579 |
+
batch_bboxes.append(bboxes)
|
580 |
|
581 |
+
return batch_bboxes, batch_keypoints, batch_scores
|
582 |
|
583 |
def __batch_call__(self, images: List[np.ndarray]):
|
584 |
batch_img, ratios = self.preprocess_batch(images)
|
585 |
outputs = self.inference(batch_img)
|
586 |
+
bboxes, keypoints, scores = self.postprocess_batch(outputs, ratios)
|
587 |
+
return bboxes, keypoints, scores
|
588 |
|
589 |
def __call__(self, image: np.array):
|
590 |
self.buffer.append(image)
|
591 |
self.in_queue.put(image)
|
592 |
|
593 |
if len(self.buffer) == self.batch_size:
|
594 |
+
b_bboxes, b_keypoints, b_scores = self.__batch_call__(self.buffer)
|
595 |
+
for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
|
596 |
+
bboxes = b_bboxes[i]
|
597 |
+
self.out_queue.put((bboxes, keypoints, scores))
|
598 |
self.buffer = []
|
599 |
|
600 |
+
frame, bboxes, keypoints, scores = None, None, None, None
|
601 |
if not self.out_queue.empty():
|
602 |
+
bboxes, keypoints, scores = self.out_queue.get()
|
603 |
frame = self.in_queue.get()
|
604 |
|
605 |
+
return frame, bboxes, keypoints, scores
|
606 |
|
607 |
|
608 |
def __init__(self,
|