File size: 2,546 Bytes
2ef3e1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
# coding: utf-8

import os
from rknn.api import RKNN
from sys import exit
import argparse
import cv2
import numpy as np
os.chdir(os.path.dirname(os.path.abspath(__file__)))

seq_lengths = [3000]
batch_sizes = [1]
mel_size = 128

def convert_encoder():
    rknn = RKNN(verbose=True)

    ONNX_MODEL=f"audio_encoder.onnx"
    RKNN_MODEL=ONNX_MODEL.replace(".onnx",".rknn")
    DATASET="dataset.txt"
    QUANTIZE=False
    input_shapes = [[[batch_size, mel_size, seq_length], [batch_size, seq_length]] for batch_size in batch_sizes for seq_length in seq_lengths]
    print(input_shapes)

    # pre-process config
    print('--> Config model')
    rknn.config(quantized_algorithm='normal', quantized_method='channel', target_platform='rk3588', optimization_level=3, dynamic_input=input_shapes) # mean_values=[0.5, 0.5, 0.5], std_values=[0.5, 0.5, 0.5],
    print('done')

    # Load ONNX model
    print("--> Loading model")
    ret = rknn.load_onnx(
        model=ONNX_MODEL,
    )

    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=QUANTIZE, dataset=DATASET, rknn_batch_size=None)
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')

    # export
    print('--> Export RKNN model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export RKNN model failed!')
        exit(ret)
    print('done')
    # rknn.init_runtime(target='rk3588')
    # # image embedding
    # img_path = "test.jpg"

    # normalize_mean = [0.5, 0.5, 0.5]
    # normalize_std = [0.5, 0.5, 0.5]

    # img = cv2.imread(img_path)
    # img = cv2.resize(img, (448, 448))
    # # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # img = img.astype(np.float32)
    # # img = (img - normalize_mean) / normalize_std
    # img = img[np.newaxis, :, :, :]
    # img = img.transpose(0, 3, 1, 2)
    # np.save("img.npy", img)
    # rknn.accuracy_analysis(inputs=["img.npy"], target='rk3588')
# usage: python convert_rknn.py encoder|all

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("model", type=str, help="model to convert", choices=["encoder", "all"], nargs='?')
    args = parser.parse_args()
    if args.model is None:
        args.model = "all"
    if args.model == "encoder":
        convert_encoder()
    elif args.model == "all":
        convert_encoder()
    else:
        print(f"Unknown model: {args.model}")
        exit(1)