pesi
/

File size: 5,264 Bytes
9f60e86
 
 
 
b58a63b
 
9f60e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dfe441
 
 
 
 
 
 
9f60e86
 
6dfe441
 
 
 
 
 
e8f08e3
 
6dfe441
e8f08e3
 
 
 
 
6dfe441
 
b58a63b
6dfe441
 
 
 
 
 
 
 
b58a63b
 
 
 
 
 
6dfe441
b58a63b
 
 
 
 
 
 
 
 
 
 
 
 
 
9f60e86
 
 
6dfe441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f60e86
 
 
 
6dfe441
684ebde
9f60e86
 
 
 
b58a63b
ab55aa6
9f60e86
 
 
 
 
684ebde
 
6dfe441
9f60e86
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import numpy as np
import onnx
from onnxconverter_common import auto_mixed_precision_model_path
import argparse
from rtmo_gpu import RTMO_GPU, draw_skeleton
import cv2

PROVIDERS=[('TensorrtExecutionProvider', {'trt_fp16_enable':True,}), 'CUDAExecutionProvider', 'CPUExecutionProvider']

def detect_model_input_size(model_path):
    model = onnx.load(model_path)
    for input_tensor in model.graph.input:
        # Assuming the input node is named 'input'
        if input_tensor.name == 'input':
            tensor_shape = input_tensor.type.tensor_type.shape
            # Extract the dimensions: (batch_size, channels, height, width)
            dims = [dim.dim_value for dim in tensor_shape.dim]
            # Replace dynamic batch size (-1 or 0) with 1
            if dims[0] < 1:
                dims[0] = 1
            return tuple(dims[2:4])  # Return (height, width)
    raise ValueError("Input node 'input' not found in the model")

def load_and_preprocess_image(image_path, preprocesss=None):

    image = cv2.imread(image_path)

    if preprocesss is not None:
        image  = preprocesss(image)

    return image

def compare_result(res1, res2):
    keypoints1, scores1 = res1
    keypoints2, scores2 = res2

    from termcolor import colored

    for j, (d1, d2) in enumerate(zip(keypoints1, keypoints2)):
        print(f'Detection {j}: ')
        for i, (j1, j2) in enumerate(zip(d1, d2)):
            (x1, y1), (x2, y2) = j1, j2
            s1, s2 = scores1[j][i], scores2[j][i]
            print(f"Joint-{i:2d}:")
            print(f'\tOriginal  ({colored("x", "blue")},{colored("y","green")},{colored("score", "red")}) = ({colored("{:4.1f}".format(x1),"blue")}, {colored("{:4.1f}".format(y1),"green")}, {colored("{:5.4f}".format(s1),"red")})')
            print(f'\tConverted ({colored("x", "blue")},{colored("y","green")},{colored("score", "red")}) = ({colored("{:4.1f}".format(x2),"blue")}, {colored("{:4.1f}".format(y2),"green")}, {colored("{:5.4f}".format(s2),"red")})')

def validate_pose(res1, res2, postprocess=None):

    if postprocess is not None:
        res1 = postprocess(res1)
        res2 = postprocess(res2)

    compare_result(res1, res2)

    for r1, r2 in zip(res1, res2):
        if not np.allclose(r1, r2, rtol=args.rtol, atol=args.atol):
            return False
    return True

def infer_on_image(onnx_model, model_input_size, test_image_path):
    body = RTMO_GPU(onnx_model=onnx_model, 
        model_input_size=model_input_size, 
        is_yolo_nas_pose=args.yolo_nas_pose)

    frame = cv2.imread(test_image_path)
    img_show = frame.copy()
    keypoints, scores = body(img_show)

    img_show = draw_skeleton(img_show,
                            keypoints,
                            scores,
                            kpt_thr=0.3,
                            line_width=2)
    img_show = cv2.resize(img_show, (788, 525))
    cv2.imshow(f'{args.target_model_path}', img_show)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def main(args):
    model_input_size = detect_model_input_size(args.source_model_path)

    body = RTMO_GPU(onnx_model=args.source_model_path,
                    model_input_size=model_input_size,
                    is_yolo_nas_pose=args.yolo_nas_pose)
    
    def preprocess(image, body, is_yolo_nas_pose):

        img, _ = body.preprocess(image)

        # build input to (1, 3, H, W)
        img = img.transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32 if not is_yolo_nas_pose else np.uint8)
        img = img[None, :, :, :]
        return img
    
    image = load_and_preprocess_image(args.test_image_path, lambda img: preprocess(img, body, args.yolo_nas_pose))

    input_feed = {'input': image}

    auto_mixed_precision_model_path.auto_convert_mixed_precision_model_path(source_model_path=args.source_model_path, 
                                    input_feed=input_feed, 
                                    target_model_path=args.target_model_path,
                                    customized_validate_func=lambda res1,res2:validate_pose(res1, res2, body.postprocess), 
                                    rtol=args.rtol, atol=args.atol,
                                    provider=PROVIDERS, 
                                    keep_io_types=True,
                                    verbose=True)

    infer_on_image(args.target_model_path, model_input_size, args.test_image_path)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert an ONNX model to mixed precision format.")
    parser.add_argument("source_model_path", type=str, help="Path to the source ONNX model.")
    parser.add_argument("target_model_path", type=str, help="Path where the mixed precision model will be saved.")
    parser.add_argument("test_image_path", type=str, help="Path to a test image for validating the model conversion.")
    parser.add_argument('--rtol', type=float, default=0.01, help=' the relative tolerance to do validation')
    parser.add_argument('--atol', type=float, default=0.001, help=' the absolute tolerance to do validation')
    parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')

    args = parser.parse_args()
    
    main(args)