Unify batched and non-batched versions

Browse files

Files changed (4) hide show

demo_batch.sh +1 -1
rtmo_demo.py +32 -23
rtmo_demo_batch.py +2 -2
rtmo_gpu.py +37 -2

demo_batch.sh CHANGED Viewed

	@@ -1,2 +1,2 @@
1	#!/bin/sh
2	- python3 ~~rtmo_demo_batch~~.py ./video rtmo-t.fp16.onnx 4


1	#!/bin/sh
2	+ python3 rtmo_demo.py ./video rtmo-t.fp16.onnx --batch_size 4

rtmo_demo.py CHANGED Viewed

@@ -5,7 +5,8 @@ import cv2
 from pathlib import Path
 import argparse
 import os
-from rtmo_gpu import RTMO_GPU, draw_skeleton, resize_to_fit_screen
 if __name__ == "__main__":
@@ -14,44 +15,52 @@ if __name__ == "__main__":
     parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
     parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
     parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
     # Parse the command-line arguments
     args = parser.parse_args()
     model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
-    body = RTMO_GPU(model=model, is_yolo_nas_pose=args.yolo_nas_pose)
     for mp4_path in Path(args.path).glob('*'):
         # Now, use the best.url, which is the direct video link for streaming
         cap = cv2.VideoCapture(filename=os.path.abspath(mp4_path))
         frame_idx = 0
         while cap.isOpened():
             success, frame = cap.read()
             frame_idx += 1
             if not success:
                 break
-            s = time.time()
             keypoints, scores = body(frame)
-            det_time = time.time() - s
-            fps = round(1.0 / det_time,1)
-            print(f'det: {fps} FPS')
-            img_show = frame.copy()
-            # if you want to use black background instead of original image,
-            # img_show = np.zeros(img_show.shape, dtype=np.uint8)
-            img_show = draw_skeleton(img_show,
-                                    keypoints,
-                                    scores,
-                                    kpt_thr=0.3,
-                                    line_width=2)
-            img_show = resize_to_fit_screen(img_show, 720, 480)
-            cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
-            cv2.imshow(f'{model}', img_show)
-            cv2.waitKey(10)

 from pathlib import Path
 import argparse
 import os
+from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
+from queue import Queue
 if __name__ == "__main__":
     parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
     parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
     parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
+    parser.add_argument('--batch_size', type=int, default=1, help='Path to a RTMO ONNX input batch size')
     # Parse the command-line arguments
     args = parser.parse_args()
     model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
+    body = RTMO_GPU_Batch(model=model, is_yolo_nas_pose=args.yolo_nas_pose, batch_size=args.batch_size)
     for mp4_path in Path(args.path).glob('*'):
         # Now, use the best.url, which is the direct video link for streaming
         cap = cv2.VideoCapture(filename=os.path.abspath(mp4_path))
+        in_queue = Queue(maxsize=args.batch_size)
         frame_idx = 0
+        s = time.time()
         while cap.isOpened():
             success, frame = cap.read()
             frame_idx += 1
+            in_queue.put(frame)
             if not success:
                 break
             keypoints, scores = body(frame)
+            if keypoints is not None:
+                if frame_idx % args.batch_size == 0 and frame_idx:
+                    current_time = time.time()
+                    det_time = current_time - s
+                    fps = round(args.batch_size / det_time, 1)
+                    print(f'det: {fps} FPS')
+                    s = current_time
+                frame = in_queue.get()
+                img_show = frame.copy()
+                # if you want to use black background instead of original image,
+                # img_show = np.zeros(img_show.shape, dtype=np.uint8)
+                img_show = draw_skeleton(img_show,
+                                        keypoints,
+                                        scores,
+                                        kpt_thr=0.3,
+                                        line_width=2)
+                img_show = resize_to_fit_screen(img_show, 720, 480)
+                cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
+                cv2.imshow(f'{model}', img_show)
+                cv2.waitKey(10)

rtmo_demo_batch.py CHANGED Viewed

@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
         # Process the batch when it's full
         if len(batch_frames) == batch_size:
             s = time.time()
-            batch_keypoints, batch_scores = body_estimator(batch_frames)
             det_time = time.time() - s
             fps = round(batch_size / det_time, 1)
             print(f'Batch det: {fps} FPS')
@@ -52,7 +52,7 @@ def process_video(video_path, body_estimator, batch_size=4):
             # Option 2: Duplicate the last frame
             batch_frames.append(batch_frames[-1])
-        batch_keypoints, batch_scores = body_estimator(batch_frames)
         for i, keypoints in enumerate(batch_keypoints):
             scores = batch_scores[i]
             frame = batch_frames[i]

         # Process the batch when it's full
         if len(batch_frames) == batch_size:
             s = time.time()
+            batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
             det_time = time.time() - s
             fps = round(batch_size / det_time, 1)
             print(f'Batch det: {fps} FPS')
             # Option 2: Duplicate the last frame
             batch_frames.append(batch_frames[-1])
+        batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
         for i, keypoints in enumerate(batch_keypoints):
             scores = batch_scores[i]
             frame = batch_frames[i]

rtmo_gpu.py CHANGED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 from typing import List, Tuple
 import onnxruntime as ort
 import cv2
 os.environ['ORT_TENSORRT_EXTRA_PLUGIN_LIB_PATHS']='libmmdeploy_tensorrt_ops.so'
 # dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
@@ -458,6 +459,7 @@ class RTMO_GPU(object):
                                   'cudnn_conv_algo_search': 'DEFAULT',
                                   'cudnn_conv_use_max_workspace': True
                                   }),
                                   'CPUExecutionProvider']}
             self.session = ort.InferenceSession(path_or_bytes=model,
@@ -547,7 +549,7 @@ class RTMO_GPU_Batch(RTMO_GPU):
         self,
         outputs: List[np.ndarray],
         ratios: List[float]
-    ) -> List[Tuple[np.ndarray, np.ndarray]]:
         """Process outputs for a batch of images.
         Args:
@@ -569,11 +571,44 @@ class RTMO_GPU_Batch(RTMO_GPU):
         return batch_keypoints, batch_scores
-    def __call__(self, images: List[np.ndarray]):
         batch_img, ratios = self.preprocess_batch(images)
         outputs = self.inference(batch_img)
         keypoints, scores = self.postprocess_batch(outputs, ratios)
         return keypoints, scores
 def resize_to_fit_screen(image, screen_width, screen_height):
     # Get the dimensions of the image

 from typing import List, Tuple
 import onnxruntime as ort
 import cv2
+from queue import Queue
 os.environ['ORT_TENSORRT_EXTRA_PLUGIN_LIB_PATHS']='libmmdeploy_tensorrt_ops.so'
 # dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
                                   'cudnn_conv_algo_search': 'DEFAULT',
                                   'cudnn_conv_use_max_workspace': True
                                   }),
+                                  'OpenVINOExecutionProvider',
                                   'CPUExecutionProvider']}
             self.session = ort.InferenceSession(path_or_bytes=model,
         self,
         outputs: List[np.ndarray],
         ratios: List[float]
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         """Process outputs for a batch of images.
         Args:
         return batch_keypoints, batch_scores
+    def __batch_call__(self, images: List[np.ndarray]):
         batch_img, ratios = self.preprocess_batch(images)
         outputs = self.inference(batch_img)
         keypoints, scores = self.postprocess_batch(outputs, ratios)
         return keypoints, scores
+    def __call__(self, image: np.array):
+        self.buffer.append(image)
+        if len(self.buffer) == self.batch_size:
+            b_keypoints, b_scores = self.__batch_call__(self.buffer)
+            for keypoints, scores in zip(b_keypoints, b_scores):
+                self.out_queue.put((keypoints, scores))
+            self.buffer = []
+        keypoints, scores = None, None
+        if not self.out_queue.empty():
+            keypoints, scores = self.out_queue.get()
+        return keypoints, scores
+    def __init__(self,
+                 model: str = None,
+                 mean: tuple = None,
+                 std: tuple = None,
+                 device: str = 'cuda',
+                 is_yolo_nas_pose = False,
+                 batch_size: int = 1):
+        super().__init__(model,
+                         mean,
+                         std,
+                         device,
+                         is_yolo_nas_pose)
+        self.batch_size = batch_size
+        self.out_queue = Queue(maxsize=self.batch_size)
+        self.buffer = []
 def resize_to_fit_screen(image, screen_width, screen_height):
     # Get the dimensions of the image