jayke
commited on
Commit
·
cf64894
1
Parent(s):
ea52087
init
Browse files- model_repo_stateful/decoder/1/.gitignore +0 -0
- model_repo_stateful/decoder/1/decoder.onnx +3 -0
- model_repo_stateful/decoder/config.pbtxt +55 -0
- model_repo_stateful/decoder/config_template.pbtxt +73 -0
- model_repo_stateful/decoder/config_template2.pbtxt +68 -0
- model_repo_stateful/encoder/1/.gitignore +0 -0
- model_repo_stateful/encoder/1/encoder.onnx +3 -0
- model_repo_stateful/encoder/config.pbtxt +109 -0
- model_repo_stateful/encoder/config_template.pbtxt +122 -0
- model_repo_stateful/encoder/config_template2.pbtxt +110 -0
- model_repo_stateful/feature_extractor/1/__pycache__/model.cpython-38.pyc +0 -0
- model_repo_stateful/feature_extractor/1/model.py +277 -0
- model_repo_stateful/feature_extractor/config.pbtxt +98 -0
- model_repo_stateful/feature_extractor/config_template.pbtxt +111 -0
- model_repo_stateful/streaming_wenet/1/.gitignore +0 -0
- model_repo_stateful/streaming_wenet/config.pbtxt +102 -0
- model_repo_stateful/streaming_wenet/config_template.pbtxt +115 -0
- model_repo_stateful/wenet/1/__pycache__/model.cpython-38.pyc +0 -0
- model_repo_stateful/wenet/1/__pycache__/wenet_onnx_model.cpython-38.pyc +0 -0
- model_repo_stateful/wenet/1/model.py +180 -0
- model_repo_stateful/wenet/1/wenet_onnx_model.py +277 -0
- model_repo_stateful/wenet/config.pbtxt +126 -0
- model_repo_stateful/wenet/config_template.pbtxt +139 -0
model_repo_stateful/decoder/1/.gitignore
ADDED
File without changes
|
model_repo_stateful/decoder/1/decoder.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2272437072ed614f41591e42633e4cc3a1c63729ff490a1c3c89a789a05eb70a
|
3 |
+
size 56292294
|
model_repo_stateful/decoder/config.pbtxt
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
name: "decoder"
|
3 |
+
backend: "onnxruntime"
|
4 |
+
default_model_filename: "decoder.onnx"
|
5 |
+
|
6 |
+
max_batch_size: 640
|
7 |
+
input [
|
8 |
+
{
|
9 |
+
name: "encoder_out"
|
10 |
+
data_type: TYPE_FP16
|
11 |
+
dims: [-1, 512] # [-1, feature_size]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
name: "encoder_out_lens"
|
15 |
+
data_type: TYPE_INT32
|
16 |
+
dims: [1]
|
17 |
+
reshape: { shape: [ ] }
|
18 |
+
},
|
19 |
+
{
|
20 |
+
name: "hyps_pad_sos_eos"
|
21 |
+
data_type: TYPE_INT64
|
22 |
+
dims: [10, -1]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
name: "hyps_lens_sos"
|
26 |
+
data_type: TYPE_INT32
|
27 |
+
dims: [10]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
name: "ctc_score"
|
31 |
+
data_type: TYPE_FP16
|
32 |
+
dims: [10]
|
33 |
+
}
|
34 |
+
]
|
35 |
+
|
36 |
+
output [
|
37 |
+
{
|
38 |
+
name: "best_index"
|
39 |
+
data_type: TYPE_INT64
|
40 |
+
dims: [1]
|
41 |
+
reshape: { shape: [ ] }
|
42 |
+
}
|
43 |
+
]
|
44 |
+
|
45 |
+
dynamic_batching {
|
46 |
+
preferred_batch_size: [ 16, 32 ]
|
47 |
+
}
|
48 |
+
|
49 |
+
instance_group [
|
50 |
+
{
|
51 |
+
count: 2
|
52 |
+
kind: KIND_GPU
|
53 |
+
}
|
54 |
+
]
|
55 |
+
|
model_repo_stateful/decoder/config_template.pbtxt
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "decoder"
|
16 |
+
backend: "onnxruntime"
|
17 |
+
default_model_filename: "decoder.onnx"
|
18 |
+
|
19 |
+
max_batch_size: 640
|
20 |
+
input [
|
21 |
+
{
|
22 |
+
name: "encoder_out"
|
23 |
+
data_type: TYPE_#DTYPE
|
24 |
+
dims: [-1, #output_size]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
name: "encoder_out_lens"
|
28 |
+
data_type: TYPE_INT32
|
29 |
+
dims: [1]
|
30 |
+
reshape: { shape: [ ] }
|
31 |
+
},
|
32 |
+
{
|
33 |
+
name: "hyps_pad_sos_eos"
|
34 |
+
data_type: TYPE_INT64
|
35 |
+
dims: [#beam_size, -1]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
name: "hyps_lens_sos"
|
39 |
+
data_type: TYPE_INT32
|
40 |
+
dims: [#beam_size]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
name: "r_hyps_pad_sos_eos"
|
44 |
+
data_type: TYPE_INT64
|
45 |
+
dims: [#beam_size, -1]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
name: "ctc_score"
|
49 |
+
data_type: TYPE_#DTYPE
|
50 |
+
dims: [#beam_size]
|
51 |
+
}
|
52 |
+
]
|
53 |
+
|
54 |
+
output [
|
55 |
+
{
|
56 |
+
name: "best_index"
|
57 |
+
data_type: TYPE_INT64
|
58 |
+
dims: [1]
|
59 |
+
reshape: { shape: [ ] }
|
60 |
+
}
|
61 |
+
]
|
62 |
+
|
63 |
+
dynamic_batching {
|
64 |
+
preferred_batch_size: [ 16, 32 ]
|
65 |
+
}
|
66 |
+
|
67 |
+
instance_group [
|
68 |
+
{
|
69 |
+
count: 2
|
70 |
+
kind: KIND_GPU
|
71 |
+
}
|
72 |
+
]
|
73 |
+
|
model_repo_stateful/decoder/config_template2.pbtxt
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "decoder"
|
16 |
+
backend: "onnxruntime"
|
17 |
+
default_model_filename: "decoder.onnx"
|
18 |
+
|
19 |
+
max_batch_size: 640
|
20 |
+
input [
|
21 |
+
{
|
22 |
+
name: "encoder_out"
|
23 |
+
data_type: TYPE_#DTYPE
|
24 |
+
dims: [-1, #output_size] # [-1, feature_size]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
name: "encoder_out_lens"
|
28 |
+
data_type: TYPE_INT32
|
29 |
+
dims: [1]
|
30 |
+
reshape: { shape: [ ] }
|
31 |
+
},
|
32 |
+
{
|
33 |
+
name: "hyps_pad_sos_eos"
|
34 |
+
data_type: TYPE_INT64
|
35 |
+
dims: [#beam_size, -1]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
name: "hyps_lens_sos"
|
39 |
+
data_type: TYPE_INT32
|
40 |
+
dims: [#beam_size]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
name: "ctc_score"
|
44 |
+
data_type: TYPE_#DTYPE
|
45 |
+
dims: [#beam_size]
|
46 |
+
}
|
47 |
+
]
|
48 |
+
|
49 |
+
output [
|
50 |
+
{
|
51 |
+
name: "best_index"
|
52 |
+
data_type: TYPE_INT64
|
53 |
+
dims: [1]
|
54 |
+
reshape: { shape: [ ] }
|
55 |
+
}
|
56 |
+
]
|
57 |
+
|
58 |
+
dynamic_batching {
|
59 |
+
preferred_batch_size: [ 16, 32 ]
|
60 |
+
}
|
61 |
+
|
62 |
+
instance_group [
|
63 |
+
{
|
64 |
+
count: 2
|
65 |
+
kind: KIND_GPU
|
66 |
+
}
|
67 |
+
]
|
68 |
+
|
model_repo_stateful/encoder/1/.gitignore
ADDED
File without changes
|
model_repo_stateful/encoder/1/encoder.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9923799f94d885a0e0c798b0d63322565f48636bd67f7b95af7f8b7e4e4b0de
|
3 |
+
size 171905418
|
model_repo_stateful/encoder/config.pbtxt
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
name: "encoder"
|
3 |
+
backend: "onnxruntime"
|
4 |
+
default_model_filename: "encoder.onnx"
|
5 |
+
|
6 |
+
max_batch_size: 512
|
7 |
+
|
8 |
+
sequence_batching{
|
9 |
+
max_sequence_idle_microseconds: 5000000
|
10 |
+
oldest {
|
11 |
+
max_candidate_sequences: 1024
|
12 |
+
preferred_batch_size: [32, 64, 128, 256]
|
13 |
+
max_queue_delay_microseconds: 5000
|
14 |
+
}
|
15 |
+
control_input [
|
16 |
+
]
|
17 |
+
state [
|
18 |
+
{
|
19 |
+
input_name: "offset"
|
20 |
+
output_name: "r_offset"
|
21 |
+
data_type: TYPE_INT64
|
22 |
+
dims: [ 1 ]
|
23 |
+
initial_state: {
|
24 |
+
data_type: TYPE_INT64
|
25 |
+
dims: [ 1 ]
|
26 |
+
zero_data: true
|
27 |
+
name: "initial state"
|
28 |
+
}
|
29 |
+
},
|
30 |
+
{
|
31 |
+
input_name: "att_cache"
|
32 |
+
output_name: "r_att_cache"
|
33 |
+
data_type: TYPE_FP16
|
34 |
+
dims: [ 12, 8, 80, 128 ]
|
35 |
+
initial_state: {
|
36 |
+
data_type: TYPE_FP16
|
37 |
+
dims: [ 12, 8, 80, 128 ]
|
38 |
+
zero_data: true
|
39 |
+
name: "initial state"
|
40 |
+
}
|
41 |
+
},
|
42 |
+
{
|
43 |
+
input_name: "cnn_cache"
|
44 |
+
output_name: "r_cnn_cache"
|
45 |
+
data_type: TYPE_FP16
|
46 |
+
dims: [12, 512, 14]
|
47 |
+
initial_state: {
|
48 |
+
data_type: TYPE_FP16
|
49 |
+
dims: [12, 512, 14]
|
50 |
+
zero_data: true
|
51 |
+
name: "initial state"
|
52 |
+
}
|
53 |
+
},
|
54 |
+
{
|
55 |
+
input_name: "cache_mask"
|
56 |
+
output_name: "r_cache_mask"
|
57 |
+
data_type: TYPE_FP16
|
58 |
+
dims: [1, 80]
|
59 |
+
initial_state: {
|
60 |
+
data_type: TYPE_FP16
|
61 |
+
dims: [1, 80]
|
62 |
+
zero_data: true
|
63 |
+
name: "initial state"
|
64 |
+
}
|
65 |
+
}
|
66 |
+
]
|
67 |
+
}
|
68 |
+
input [
|
69 |
+
{
|
70 |
+
name: "chunk_xs"
|
71 |
+
data_type: TYPE_FP16
|
72 |
+
dims: [67, 80]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
name: "chunk_lens"
|
76 |
+
data_type: TYPE_INT32
|
77 |
+
dims: [ 1 ]
|
78 |
+
reshape: { shape: [] }
|
79 |
+
}
|
80 |
+
]
|
81 |
+
output [
|
82 |
+
{
|
83 |
+
name: "log_probs"
|
84 |
+
data_type: TYPE_FP16
|
85 |
+
dims: [-1, 10] # [-1, beam_size]
|
86 |
+
},
|
87 |
+
{
|
88 |
+
name: "log_probs_idx"
|
89 |
+
data_type: TYPE_INT64
|
90 |
+
dims: [-1, 10] # [-1, beam_size]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
name: "chunk_out"
|
94 |
+
data_type: TYPE_FP16
|
95 |
+
dims: [-1, -1]
|
96 |
+
},
|
97 |
+
{
|
98 |
+
name: "chunk_out_lens"
|
99 |
+
data_type: TYPE_INT32
|
100 |
+
dims: [1]
|
101 |
+
reshape: { shape: [] }
|
102 |
+
}
|
103 |
+
]
|
104 |
+
instance_group [
|
105 |
+
{
|
106 |
+
count: 2
|
107 |
+
kind: KIND_GPU
|
108 |
+
}
|
109 |
+
]
|
model_repo_stateful/encoder/config_template.pbtxt
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "encoder"
|
16 |
+
backend: "onnxruntime"
|
17 |
+
default_model_filename: "encoder.onnx"
|
18 |
+
|
19 |
+
max_batch_size: 512
|
20 |
+
|
21 |
+
sequence_batching{
|
22 |
+
max_sequence_idle_microseconds: 5000000
|
23 |
+
oldest {
|
24 |
+
max_candidate_sequences: 1024
|
25 |
+
preferred_batch_size: [32, 64, 128, 256]
|
26 |
+
max_queue_delay_microseconds: 5000
|
27 |
+
}
|
28 |
+
control_input [
|
29 |
+
]
|
30 |
+
state [
|
31 |
+
{
|
32 |
+
input_name: "offset"
|
33 |
+
output_name: "r_offset"
|
34 |
+
data_type: TYPE_INT64
|
35 |
+
dims: [ 1 ]
|
36 |
+
initial_state: {
|
37 |
+
data_type: TYPE_INT64
|
38 |
+
dims: [ 1 ]
|
39 |
+
zero_data: true
|
40 |
+
name: "initial state"
|
41 |
+
}
|
42 |
+
},
|
43 |
+
{
|
44 |
+
input_name: "att_cache"
|
45 |
+
output_name: "r_att_cache"
|
46 |
+
data_type: TYPE_#DTYPE
|
47 |
+
dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
|
48 |
+
initial_state: {
|
49 |
+
data_type: TYPE_#DTYPE
|
50 |
+
dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
|
51 |
+
zero_data: true
|
52 |
+
name: "initial state"
|
53 |
+
}
|
54 |
+
},
|
55 |
+
{
|
56 |
+
input_name: "cnn_cache"
|
57 |
+
output_name: "r_cnn_cache"
|
58 |
+
data_type: TYPE_#DTYPE
|
59 |
+
dims: [#num_layers, #output_size, #cnn_module_cache]
|
60 |
+
initial_state: {
|
61 |
+
data_type: TYPE_#DTYPE
|
62 |
+
dims: [#num_layers, #output_size, #cnn_module_cache]
|
63 |
+
zero_data: true
|
64 |
+
name: "initial state"
|
65 |
+
}
|
66 |
+
},
|
67 |
+
{
|
68 |
+
input_name: "cache_mask"
|
69 |
+
output_name: "r_cache_mask"
|
70 |
+
data_type: TYPE_#DTYPE
|
71 |
+
dims: [1, #cache_size]
|
72 |
+
initial_state: {
|
73 |
+
data_type: TYPE_#DTYPE
|
74 |
+
dims: [1, #cache_size]
|
75 |
+
zero_data: true
|
76 |
+
name: "initial state"
|
77 |
+
}
|
78 |
+
}
|
79 |
+
]
|
80 |
+
}
|
81 |
+
input [
|
82 |
+
{
|
83 |
+
name: "chunk_xs"
|
84 |
+
data_type: TYPE_#DTYPE
|
85 |
+
dims: [#decoding_window, #num_mel_bins]
|
86 |
+
},
|
87 |
+
{
|
88 |
+
name: "chunk_lens"
|
89 |
+
data_type: TYPE_INT32
|
90 |
+
dims: [ 1 ]
|
91 |
+
reshape: { shape: [] }
|
92 |
+
}
|
93 |
+
]
|
94 |
+
output [
|
95 |
+
{
|
96 |
+
name: "log_probs"
|
97 |
+
data_type: TYPE_#DTYPE
|
98 |
+
dims: [-1, #beam_size] # [-1, beam_size]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
name: "log_probs_idx"
|
102 |
+
data_type: TYPE_INT64
|
103 |
+
dims: [-1, #beam_size] # [-1, beam_size]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
name: "chunk_out"
|
107 |
+
data_type: TYPE_#DTYPE
|
108 |
+
dims: [-1, #encoder_output_size]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
name: "chunk_out_lens"
|
112 |
+
data_type: TYPE_INT32
|
113 |
+
dims: [1]
|
114 |
+
reshape: { shape: [] }
|
115 |
+
}
|
116 |
+
]
|
117 |
+
instance_group [
|
118 |
+
{
|
119 |
+
count: 2
|
120 |
+
kind: KIND_GPU
|
121 |
+
}
|
122 |
+
]
|
model_repo_stateful/encoder/config_template2.pbtxt
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "encoder"
|
16 |
+
backend: "onnxruntime"
|
17 |
+
default_model_filename: "encoder.onnx"
|
18 |
+
|
19 |
+
max_batch_size: 512
|
20 |
+
|
21 |
+
sequence_batching{
|
22 |
+
max_sequence_idle_microseconds: 5000000
|
23 |
+
oldest {
|
24 |
+
max_candidate_sequences: 1024
|
25 |
+
preferred_batch_size: [32, 64, 128, 256]
|
26 |
+
max_queue_delay_microseconds: 5000
|
27 |
+
}
|
28 |
+
control_input [
|
29 |
+
]
|
30 |
+
state [
|
31 |
+
{
|
32 |
+
input_name: "offset"
|
33 |
+
output_name: "r_offset"
|
34 |
+
data_type: TYPE_INT64
|
35 |
+
dims: [ 1 ]
|
36 |
+
initial_state: {
|
37 |
+
data_type: TYPE_INT64
|
38 |
+
dims: [ 1 ]
|
39 |
+
zero_data: true
|
40 |
+
name: "initial state"
|
41 |
+
}
|
42 |
+
},
|
43 |
+
{
|
44 |
+
input_name: "att_cache"
|
45 |
+
output_name: "r_att_cache"
|
46 |
+
data_type: TYPE_#DTYPE
|
47 |
+
dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
|
48 |
+
initial_state: {
|
49 |
+
data_type: TYPE_#DTYPE
|
50 |
+
dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
|
51 |
+
zero_data: true
|
52 |
+
name: "initial state"
|
53 |
+
}
|
54 |
+
},
|
55 |
+
{
|
56 |
+
input_name: "cache_mask"
|
57 |
+
output_name: "r_cache_mask"
|
58 |
+
data_type: TYPE_#DTYPE
|
59 |
+
dims: [1, #cache_size]
|
60 |
+
initial_state: {
|
61 |
+
data_type: TYPE_#DTYPE
|
62 |
+
dims: [1, #cache_size]
|
63 |
+
zero_data: true
|
64 |
+
name: "initial state"
|
65 |
+
}
|
66 |
+
}
|
67 |
+
]
|
68 |
+
}
|
69 |
+
input [
|
70 |
+
{
|
71 |
+
name: "chunk_xs"
|
72 |
+
data_type: TYPE_#DTYPE
|
73 |
+
dims: [#decoding_window, #num_mel_bins]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
name: "chunk_lens"
|
77 |
+
data_type: TYPE_INT32
|
78 |
+
dims: [ 1 ]
|
79 |
+
reshape: { shape: [] }
|
80 |
+
}
|
81 |
+
]
|
82 |
+
output [
|
83 |
+
{
|
84 |
+
name: "log_probs"
|
85 |
+
data_type: TYPE_#DTYPE
|
86 |
+
dims: [-1, #beam_size] # [-1, beam_size]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
name: "log_probs_idx"
|
90 |
+
data_type: TYPE_INT64
|
91 |
+
dims: [-1, #beam_size] # [-1, beam_size]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
name: "chunk_out"
|
95 |
+
data_type: TYPE_#DTYPE
|
96 |
+
dims: [-1, #encoder_output_size]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
name: "chunk_out_lens"
|
100 |
+
data_type: TYPE_INT32
|
101 |
+
dims: [1]
|
102 |
+
reshape: { shape: [] }
|
103 |
+
}
|
104 |
+
]
|
105 |
+
instance_group [
|
106 |
+
{
|
107 |
+
count: 2
|
108 |
+
kind: KIND_GPU
|
109 |
+
}
|
110 |
+
]
|
model_repo_stateful/feature_extractor/1/__pycache__/model.cpython-38.pyc
ADDED
Binary file (7.7 kB). View file
|
|
model_repo_stateful/feature_extractor/1/model.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
import triton_python_backend_utils as pb_utils
|
16 |
+
from torch.utils.dlpack import from_dlpack
|
17 |
+
import torch
|
18 |
+
import kaldifeat
|
19 |
+
import _kaldifeat
|
20 |
+
from typing import List
|
21 |
+
import json
|
22 |
+
import numpy as np
|
23 |
+
|
24 |
+
|
25 |
+
class Fbank(torch.nn.Module):
|
26 |
+
def __init__(self, opts):
|
27 |
+
super(Fbank, self).__init__()
|
28 |
+
self.fbank = kaldifeat.Fbank(opts)
|
29 |
+
|
30 |
+
def forward(self, waves: List[torch.Tensor]):
|
31 |
+
return self.fbank(waves)
|
32 |
+
|
33 |
+
|
34 |
+
class Feat(object):
|
35 |
+
def __init__(
|
36 |
+
self, seqid, offset_ms, sample_rate, first_chunk_sz, frame_stride, device="cpu"
|
37 |
+
):
|
38 |
+
self.seqid = seqid
|
39 |
+
self.sample_rate = sample_rate
|
40 |
+
self.wav = torch.tensor([], device=device)
|
41 |
+
self.offset = int(offset_ms / 1000 * sample_rate)
|
42 |
+
self.frames = None
|
43 |
+
self.frame_stride = int(frame_stride)
|
44 |
+
self.first_chunk_sz = first_chunk_sz
|
45 |
+
self.device = device
|
46 |
+
|
47 |
+
def add_wavs(self, wav: torch.tensor):
|
48 |
+
if len(self.wav) == 0 and len(wav) < self.first_chunk_sz:
|
49 |
+
raise Exception("Invalid first chunk size", len(wav))
|
50 |
+
wav = wav.to(self.device)
|
51 |
+
self.wav = torch.cat([self.wav, wav], axis=0)
|
52 |
+
|
53 |
+
def get_seg_wav(self):
|
54 |
+
seg = self.wav[:]
|
55 |
+
self.wav = self.wav[-self.offset :]
|
56 |
+
return seg
|
57 |
+
|
58 |
+
def add_frames(self, frames: torch.tensor):
|
59 |
+
"""
|
60 |
+
frames: seq_len x feat_sz
|
61 |
+
"""
|
62 |
+
if self.frames is None:
|
63 |
+
self.frames = frames
|
64 |
+
else:
|
65 |
+
self.frames = torch.cat([self.frames, frames], axis=0)
|
66 |
+
|
67 |
+
def get_frames(self, num_frames: int):
|
68 |
+
seg = self.frames[0:num_frames]
|
69 |
+
self.frames = self.frames[self.frame_stride :]
|
70 |
+
return seg
|
71 |
+
|
72 |
+
|
73 |
+
class TritonPythonModel:
|
74 |
+
"""Your Python model must use the same class name. Every Python model
|
75 |
+
that is created must have "TritonPythonModel" as the class name.
|
76 |
+
"""
|
77 |
+
|
78 |
+
def initialize(self, args):
|
79 |
+
"""`initialize` is called only once when the model is being loaded.
|
80 |
+
Implementing `initialize` function is optional. This function allows
|
81 |
+
the model to initialize any state associated with this model.
|
82 |
+
Parameters
|
83 |
+
----------
|
84 |
+
args : dict
|
85 |
+
Both keys and values are strings. The dictionary keys and values are:
|
86 |
+
* model_config: A JSON string containing the model configuration
|
87 |
+
* model_instance_kind: A string containing model instance kind
|
88 |
+
* model_instance_device_id: A string containing model instance device ID
|
89 |
+
* model_repository: Model repository path
|
90 |
+
* model_version: Model version
|
91 |
+
* model_name: Model name
|
92 |
+
"""
|
93 |
+
self.model_config = model_config = json.loads(args["model_config"])
|
94 |
+
self.max_batch_size = max(model_config["max_batch_size"], 1)
|
95 |
+
|
96 |
+
if "GPU" in model_config["instance_group"][0]["kind"]:
|
97 |
+
self.device = "cuda"
|
98 |
+
else:
|
99 |
+
self.device = "cpu"
|
100 |
+
|
101 |
+
# Get OUTPUT0 configuration
|
102 |
+
output0_config = pb_utils.get_output_config_by_name(model_config, "speech")
|
103 |
+
# Convert Triton types to numpy types
|
104 |
+
self.output0_dtype = pb_utils.triton_string_to_numpy(
|
105 |
+
output0_config["data_type"]
|
106 |
+
)
|
107 |
+
|
108 |
+
if self.output0_dtype == np.float32:
|
109 |
+
self.dtype = torch.float32
|
110 |
+
else:
|
111 |
+
self.dtype = torch.float16
|
112 |
+
|
113 |
+
self.feature_size = output0_config["dims"][-1]
|
114 |
+
self.decoding_window = output0_config["dims"][-2]
|
115 |
+
# Get OUTPUT1 configuration
|
116 |
+
output1_config = pb_utils.get_output_config_by_name(
|
117 |
+
model_config, "speech_lengths"
|
118 |
+
)
|
119 |
+
# Convert Triton types to numpy types
|
120 |
+
self.output1_dtype = pb_utils.triton_string_to_numpy(
|
121 |
+
output1_config["data_type"]
|
122 |
+
)
|
123 |
+
|
124 |
+
feat_opt = self.parse_model_params(model_config["parameters"])
|
125 |
+
|
126 |
+
opts = kaldifeat.FbankOptions()
|
127 |
+
opts.frame_opts.dither = 0
|
128 |
+
opts.mel_opts.num_bins = self.feature_size
|
129 |
+
frame_length_ms = feat_opt["frame_length_ms"]
|
130 |
+
frame_shift_ms = feat_opt["frame_shift_ms"]
|
131 |
+
opts.frame_opts.frame_length_ms = frame_length_ms
|
132 |
+
opts.frame_opts.frame_shift_ms = frame_shift_ms
|
133 |
+
opts.frame_opts.samp_freq = feat_opt["sample_rate"]
|
134 |
+
opts.device = torch.device(self.device)
|
135 |
+
self.opts = opts
|
136 |
+
self.feature_extractor = Fbank(self.opts)
|
137 |
+
self.seq_feat = {}
|
138 |
+
chunk_size_s = feat_opt["chunk_size_s"]
|
139 |
+
sample_rate = feat_opt["sample_rate"]
|
140 |
+
self.chunk_size = int(chunk_size_s * sample_rate)
|
141 |
+
self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms
|
142 |
+
|
143 |
+
first_chunk_size = int(self.chunk_size)
|
144 |
+
cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
|
145 |
+
while cur_frames < self.decoding_window:
|
146 |
+
first_chunk_size += frame_shift_ms * sample_rate // 1000
|
147 |
+
cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
|
148 |
+
# self.pad_silence = first_chunk_size - self.chunk_size
|
149 |
+
self.first_chunk_size = first_chunk_size
|
150 |
+
self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms)
|
151 |
+
self.sample_rate = sample_rate
|
152 |
+
self.min_seg = frame_length_ms * sample_rate // 1000
|
153 |
+
print("MIN SEG IS", self.min_seg)
|
154 |
+
|
155 |
+
def get_offset(self, frame_length_ms, frame_shift_ms):
|
156 |
+
offset_ms = 0
|
157 |
+
while offset_ms + frame_shift_ms < frame_length_ms:
|
158 |
+
offset_ms += frame_shift_ms
|
159 |
+
return offset_ms
|
160 |
+
|
161 |
+
def parse_model_params(self, model_params):
|
162 |
+
model_p = {
|
163 |
+
"frame_length_ms": 25,
|
164 |
+
"frame_shift_ms": 10,
|
165 |
+
"sample_rate": 16000,
|
166 |
+
"chunk_size_s": 0.64,
|
167 |
+
}
|
168 |
+
# get parameter configurations
|
169 |
+
for li in model_params.items():
|
170 |
+
key, value = li
|
171 |
+
true_value = value["string_value"]
|
172 |
+
if key not in model_p:
|
173 |
+
continue
|
174 |
+
key_type = type(model_p[key])
|
175 |
+
if key_type == type(None):
|
176 |
+
model_p[key] = true_value
|
177 |
+
else:
|
178 |
+
model_p[key] = key_type(true_value)
|
179 |
+
return model_p
|
180 |
+
|
181 |
+
def execute(self, requests):
|
182 |
+
"""`execute` must be implemented in every Python model. `execute`
|
183 |
+
function receives a list of pb_utils.InferenceRequest as the only
|
184 |
+
argument. This function is called when an inference is requested
|
185 |
+
for this model.
|
186 |
+
Parameters
|
187 |
+
----------
|
188 |
+
requests : list
|
189 |
+
A list of pb_utils.InferenceRequest
|
190 |
+
Returns
|
191 |
+
-------
|
192 |
+
list
|
193 |
+
A list of pb_utils.InferenceResponse. The length of this list must
|
194 |
+
be the same as `requests`
|
195 |
+
"""
|
196 |
+
total_waves = []
|
197 |
+
responses = []
|
198 |
+
batch_seqid = []
|
199 |
+
end_seqid = {}
|
200 |
+
for request in requests:
|
201 |
+
input0 = pb_utils.get_input_tensor_by_name(request, "wav")
|
202 |
+
# wavs = input0.as_numpy()[0]
|
203 |
+
wavs = from_dlpack(input0.to_dlpack())[0]
|
204 |
+
|
205 |
+
input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
|
206 |
+
# wav_lens = input1.as_numpy()[0][0]
|
207 |
+
wav_lens = from_dlpack(input1.to_dlpack())[0]
|
208 |
+
in_start = pb_utils.get_input_tensor_by_name(request, "START")
|
209 |
+
start = in_start.as_numpy()[0][0]
|
210 |
+
in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
|
211 |
+
ready = in_ready.as_numpy()[0][0]
|
212 |
+
in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
|
213 |
+
corrid = in_corrid.as_numpy()[0][0]
|
214 |
+
in_end = pb_utils.get_input_tensor_by_name(request, "END")
|
215 |
+
end = in_end.as_numpy()[0][0]
|
216 |
+
print(wavs.size(), wav_lens, ready, start, corrid, end)
|
217 |
+
if start:
|
218 |
+
self.seq_feat[corrid] = Feat(
|
219 |
+
corrid,
|
220 |
+
self.offset_ms,
|
221 |
+
self.sample_rate,
|
222 |
+
self.first_chunk_size,
|
223 |
+
self.frame_stride,
|
224 |
+
self.device,
|
225 |
+
)
|
226 |
+
if ready:
|
227 |
+
self.seq_feat[corrid].add_wavs(wavs[0:wav_lens])
|
228 |
+
|
229 |
+
batch_seqid.append(corrid)
|
230 |
+
if end:
|
231 |
+
end_seqid[corrid] = 1
|
232 |
+
|
233 |
+
# if not start
|
234 |
+
# check chunk ms size
|
235 |
+
|
236 |
+
wav = self.seq_feat[corrid].get_seg_wav() * 32768
|
237 |
+
if len(wav) < self.min_seg:
|
238 |
+
temp = torch.zeros(
|
239 |
+
self.min_seg, dtype=torch.float32, device=self.device
|
240 |
+
)
|
241 |
+
temp[0 : len(wav)] = wav[:]
|
242 |
+
wav = temp
|
243 |
+
total_waves.append(wav)
|
244 |
+
|
245 |
+
features = self.feature_extractor(total_waves)
|
246 |
+
|
247 |
+
batch_size = len(batch_seqid)
|
248 |
+
batch_speech = torch.zeros(
|
249 |
+
(batch_size, self.decoding_window, self.feature_size), dtype=self.dtype
|
250 |
+
)
|
251 |
+
batch_speech_lens = torch.zeros((batch_size, 1), dtype=torch.int32)
|
252 |
+
i = 0
|
253 |
+
for corrid, frames in zip(batch_seqid, features):
|
254 |
+
self.seq_feat[corrid].add_frames(frames)
|
255 |
+
r_frames = self.seq_feat[corrid].get_frames(self.decoding_window)
|
256 |
+
speech = batch_speech[i : i + 1]
|
257 |
+
speech_lengths = batch_speech_lens[i : i + 1]
|
258 |
+
i += 1
|
259 |
+
speech_lengths[0] = r_frames.size(0)
|
260 |
+
speech[0][0 : r_frames.size(0)] = r_frames.to(speech.device)
|
261 |
+
# out_tensor0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
|
262 |
+
# out_tensor1 = pb_utils.Tensor.from_dlpack("speech_lengths",
|
263 |
+
# to_dlpack(speech_lengths))
|
264 |
+
out_tensor0 = pb_utils.Tensor("speech", speech.numpy())
|
265 |
+
out_tensor1 = pb_utils.Tensor("speech_lengths", speech_lengths.numpy())
|
266 |
+
output_tensors = [out_tensor0, out_tensor1]
|
267 |
+
response = pb_utils.InferenceResponse(output_tensors=output_tensors)
|
268 |
+
responses.append(response)
|
269 |
+
if corrid in end_seqid:
|
270 |
+
del self.seq_feat[corrid]
|
271 |
+
print(
|
272 |
+
f"feature extractor results: corrid is {corrid}, speech is {speech.numpy()}, speech_lengths is {speech_lengths.numpy()}"
|
273 |
+
)
|
274 |
+
return responses
|
275 |
+
|
276 |
+
def finalize(self):
|
277 |
+
print("Remove feature extractor!")
|
model_repo_stateful/feature_extractor/config.pbtxt
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
name: "feature_extractor"
|
3 |
+
backend: "python"
|
4 |
+
max_batch_size: 512
|
5 |
+
|
6 |
+
parameters [
|
7 |
+
{
|
8 |
+
key: "frame_length_ms",
|
9 |
+
value: { string_value: "25" }
|
10 |
+
},
|
11 |
+
{
|
12 |
+
key: "frame_shift_ms"
|
13 |
+
value: { string_value: "10" }
|
14 |
+
},
|
15 |
+
{
|
16 |
+
key: "sample_rate"
|
17 |
+
value: { string_value: "16000" }
|
18 |
+
},
|
19 |
+
{
|
20 |
+
key: "chunk_size_s",
|
21 |
+
value: { string_value: "0.64" }
|
22 |
+
}
|
23 |
+
]
|
24 |
+
sequence_batching{
|
25 |
+
max_sequence_idle_microseconds: 5000000
|
26 |
+
oldest {
|
27 |
+
max_candidate_sequences: 512
|
28 |
+
preferred_batch_size: [ 32, 64, 128, 256]
|
29 |
+
}
|
30 |
+
control_input [
|
31 |
+
{
|
32 |
+
name: "START",
|
33 |
+
control [
|
34 |
+
{
|
35 |
+
kind: CONTROL_SEQUENCE_START
|
36 |
+
fp32_false_true: [0, 1]
|
37 |
+
}
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
name: "READY"
|
42 |
+
control [
|
43 |
+
{
|
44 |
+
kind: CONTROL_SEQUENCE_READY
|
45 |
+
fp32_false_true: [0, 1]
|
46 |
+
}
|
47 |
+
]
|
48 |
+
},
|
49 |
+
{
|
50 |
+
name: "CORRID",
|
51 |
+
control [
|
52 |
+
{
|
53 |
+
kind: CONTROL_SEQUENCE_CORRID
|
54 |
+
data_type: TYPE_UINT64
|
55 |
+
}
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
name: "END",
|
60 |
+
control [
|
61 |
+
{
|
62 |
+
kind: CONTROL_SEQUENCE_END
|
63 |
+
fp32_false_true: [0, 1]
|
64 |
+
}
|
65 |
+
]
|
66 |
+
}
|
67 |
+
]
|
68 |
+
}
|
69 |
+
input [
|
70 |
+
{
|
71 |
+
name: "wav"
|
72 |
+
data_type: TYPE_FP32
|
73 |
+
dims: [-1]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
name: "wav_lens"
|
77 |
+
data_type: TYPE_INT32
|
78 |
+
dims: [1]
|
79 |
+
}
|
80 |
+
]
|
81 |
+
output [
|
82 |
+
{
|
83 |
+
name: "speech"
|
84 |
+
data_type: TYPE_FP16 # FP32
|
85 |
+
dims: [67, 80]
|
86 |
+
},
|
87 |
+
{
|
88 |
+
name: "speech_lengths"
|
89 |
+
data_type: TYPE_INT32
|
90 |
+
dims: [1]
|
91 |
+
}
|
92 |
+
]
|
93 |
+
instance_group [
|
94 |
+
{
|
95 |
+
count: 2
|
96 |
+
kind: KIND_GPU
|
97 |
+
}
|
98 |
+
]
|
model_repo_stateful/feature_extractor/config_template.pbtxt
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "feature_extractor"
|
16 |
+
backend: "python"
|
17 |
+
max_batch_size: 512
|
18 |
+
|
19 |
+
parameters [
|
20 |
+
{
|
21 |
+
key: "frame_length_ms",
|
22 |
+
value: { string_value: "#frame_length" }
|
23 |
+
},
|
24 |
+
{
|
25 |
+
key: "frame_shift_ms"
|
26 |
+
value: { string_value: "#frame_shift" }
|
27 |
+
},
|
28 |
+
{
|
29 |
+
key: "sample_rate"
|
30 |
+
value: { string_value: "#sample_rate" }
|
31 |
+
},
|
32 |
+
{
|
33 |
+
key: "chunk_size_s",
|
34 |
+
value: { string_value: "#chunk_size_in_seconds" }
|
35 |
+
}
|
36 |
+
]
|
37 |
+
sequence_batching{
|
38 |
+
max_sequence_idle_microseconds: 5000000
|
39 |
+
oldest {
|
40 |
+
max_candidate_sequences: 512
|
41 |
+
preferred_batch_size: [ 32, 64, 128, 256]
|
42 |
+
}
|
43 |
+
control_input [
|
44 |
+
{
|
45 |
+
name: "START",
|
46 |
+
control [
|
47 |
+
{
|
48 |
+
kind: CONTROL_SEQUENCE_START
|
49 |
+
fp32_false_true: [0, 1]
|
50 |
+
}
|
51 |
+
]
|
52 |
+
},
|
53 |
+
{
|
54 |
+
name: "READY"
|
55 |
+
control [
|
56 |
+
{
|
57 |
+
kind: CONTROL_SEQUENCE_READY
|
58 |
+
fp32_false_true: [0, 1]
|
59 |
+
}
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
name: "CORRID",
|
64 |
+
control [
|
65 |
+
{
|
66 |
+
kind: CONTROL_SEQUENCE_CORRID
|
67 |
+
data_type: TYPE_UINT64
|
68 |
+
}
|
69 |
+
]
|
70 |
+
},
|
71 |
+
{
|
72 |
+
name: "END",
|
73 |
+
control [
|
74 |
+
{
|
75 |
+
kind: CONTROL_SEQUENCE_END
|
76 |
+
fp32_false_true: [0, 1]
|
77 |
+
}
|
78 |
+
]
|
79 |
+
}
|
80 |
+
]
|
81 |
+
}
|
82 |
+
input [
|
83 |
+
{
|
84 |
+
name: "wav"
|
85 |
+
data_type: TYPE_FP32
|
86 |
+
dims: [-1]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
name: "wav_lens"
|
90 |
+
data_type: TYPE_INT32
|
91 |
+
dims: [1]
|
92 |
+
}
|
93 |
+
]
|
94 |
+
output [
|
95 |
+
{
|
96 |
+
name: "speech"
|
97 |
+
data_type: TYPE_#DTYPE # FP32
|
98 |
+
dims: [#decoding_window, #num_mel_bins]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
name: "speech_lengths"
|
102 |
+
data_type: TYPE_INT32
|
103 |
+
dims: [1]
|
104 |
+
}
|
105 |
+
]
|
106 |
+
instance_group [
|
107 |
+
{
|
108 |
+
count: 2
|
109 |
+
kind: KIND_GPU
|
110 |
+
}
|
111 |
+
]
|
model_repo_stateful/streaming_wenet/1/.gitignore
ADDED
File without changes
|
model_repo_stateful/streaming_wenet/config.pbtxt
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
name: "streaming_wenet"
|
3 |
+
platform: "ensemble"
|
4 |
+
max_batch_size: 512 #MAX_BATCH
|
5 |
+
|
6 |
+
input [
|
7 |
+
{
|
8 |
+
name: "WAV"
|
9 |
+
data_type: TYPE_FP32
|
10 |
+
dims: [-1]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
name: "WAV_LENS"
|
14 |
+
data_type: TYPE_INT32
|
15 |
+
dims: [1]
|
16 |
+
}
|
17 |
+
]
|
18 |
+
|
19 |
+
output [
|
20 |
+
{
|
21 |
+
name: "TRANSCRIPTS"
|
22 |
+
data_type: TYPE_STRING
|
23 |
+
dims: [1]
|
24 |
+
}
|
25 |
+
]
|
26 |
+
|
27 |
+
ensemble_scheduling {
|
28 |
+
step [
|
29 |
+
{
|
30 |
+
model_name: "feature_extractor"
|
31 |
+
model_version: -1
|
32 |
+
input_map {
|
33 |
+
key: "wav"
|
34 |
+
value: "WAV"
|
35 |
+
}
|
36 |
+
input_map {
|
37 |
+
key: "wav_lens"
|
38 |
+
value: "WAV_LENS"
|
39 |
+
}
|
40 |
+
output_map {
|
41 |
+
key: "speech"
|
42 |
+
value: "SPEECH"
|
43 |
+
}
|
44 |
+
output_map {
|
45 |
+
key: "speech_lengths"
|
46 |
+
value: "SPEECH_LENGTHS"
|
47 |
+
}
|
48 |
+
},
|
49 |
+
{
|
50 |
+
model_name: "encoder"
|
51 |
+
model_version: -1
|
52 |
+
input_map {
|
53 |
+
key: "chunk_xs"
|
54 |
+
value: "SPEECH"
|
55 |
+
}
|
56 |
+
input_map {
|
57 |
+
key: "chunk_lens"
|
58 |
+
value: "SPEECH_LENGTHS"
|
59 |
+
}
|
60 |
+
output_map {
|
61 |
+
key: "log_probs"
|
62 |
+
value: "LOG_PROBS"
|
63 |
+
}
|
64 |
+
output_map {
|
65 |
+
key: "log_probs_idx"
|
66 |
+
value: "LOG_PROBS_IDX"
|
67 |
+
}
|
68 |
+
output_map {
|
69 |
+
key: "chunk_out"
|
70 |
+
value: "CHUNK_OUT"
|
71 |
+
}
|
72 |
+
output_map {
|
73 |
+
key: "chunk_out_lens"
|
74 |
+
value: "CHUNK_OUT_LENS"
|
75 |
+
}
|
76 |
+
},
|
77 |
+
{
|
78 |
+
model_name: "wenet"
|
79 |
+
model_version: -1
|
80 |
+
input_map {
|
81 |
+
key: "log_probs"
|
82 |
+
value: "LOG_PROBS"
|
83 |
+
}
|
84 |
+
input_map {
|
85 |
+
key: "log_probs_idx"
|
86 |
+
value: "LOG_PROBS_IDX"
|
87 |
+
}
|
88 |
+
input_map {
|
89 |
+
key: "chunk_out"
|
90 |
+
value: "CHUNK_OUT"
|
91 |
+
}
|
92 |
+
input_map {
|
93 |
+
key: "chunk_out_lens"
|
94 |
+
value: "CHUNK_OUT_LENS"
|
95 |
+
}
|
96 |
+
output_map {
|
97 |
+
key: "OUTPUT0"
|
98 |
+
value: "TRANSCRIPTS"
|
99 |
+
}
|
100 |
+
}
|
101 |
+
]
|
102 |
+
}
|
model_repo_stateful/streaming_wenet/config_template.pbtxt
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "streaming_wenet"
|
16 |
+
platform: "ensemble"
|
17 |
+
max_batch_size: 512 #MAX_BATCH
|
18 |
+
|
19 |
+
input [
|
20 |
+
{
|
21 |
+
name: "WAV"
|
22 |
+
data_type: TYPE_FP32
|
23 |
+
dims: [-1]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
name: "WAV_LENS"
|
27 |
+
data_type: TYPE_INT32
|
28 |
+
dims: [1]
|
29 |
+
}
|
30 |
+
]
|
31 |
+
|
32 |
+
output [
|
33 |
+
{
|
34 |
+
name: "TRANSCRIPTS"
|
35 |
+
data_type: TYPE_STRING
|
36 |
+
dims: [1]
|
37 |
+
}
|
38 |
+
]
|
39 |
+
|
40 |
+
ensemble_scheduling {
|
41 |
+
step [
|
42 |
+
{
|
43 |
+
model_name: "feature_extractor"
|
44 |
+
model_version: -1
|
45 |
+
input_map {
|
46 |
+
key: "wav"
|
47 |
+
value: "WAV"
|
48 |
+
}
|
49 |
+
input_map {
|
50 |
+
key: "wav_lens"
|
51 |
+
value: "WAV_LENS"
|
52 |
+
}
|
53 |
+
output_map {
|
54 |
+
key: "speech"
|
55 |
+
value: "SPEECH"
|
56 |
+
}
|
57 |
+
output_map {
|
58 |
+
key: "speech_lengths"
|
59 |
+
value: "SPEECH_LENGTHS"
|
60 |
+
}
|
61 |
+
},
|
62 |
+
{
|
63 |
+
model_name: "encoder"
|
64 |
+
model_version: -1
|
65 |
+
input_map {
|
66 |
+
key: "chunk_xs"
|
67 |
+
value: "SPEECH"
|
68 |
+
}
|
69 |
+
input_map {
|
70 |
+
key: "chunk_lens"
|
71 |
+
value: "SPEECH_LENGTHS"
|
72 |
+
}
|
73 |
+
output_map {
|
74 |
+
key: "log_probs"
|
75 |
+
value: "LOG_PROBS"
|
76 |
+
}
|
77 |
+
output_map {
|
78 |
+
key: "log_probs_idx"
|
79 |
+
value: "LOG_PROBS_IDX"
|
80 |
+
}
|
81 |
+
output_map {
|
82 |
+
key: "chunk_out"
|
83 |
+
value: "CHUNK_OUT"
|
84 |
+
}
|
85 |
+
output_map {
|
86 |
+
key: "chunk_out_lens"
|
87 |
+
value: "CHUNK_OUT_LENS"
|
88 |
+
}
|
89 |
+
},
|
90 |
+
{
|
91 |
+
model_name: "wenet"
|
92 |
+
model_version: -1
|
93 |
+
input_map {
|
94 |
+
key: "log_probs"
|
95 |
+
value: "LOG_PROBS"
|
96 |
+
}
|
97 |
+
input_map {
|
98 |
+
key: "log_probs_idx"
|
99 |
+
value: "LOG_PROBS_IDX"
|
100 |
+
}
|
101 |
+
input_map {
|
102 |
+
key: "chunk_out"
|
103 |
+
value: "CHUNK_OUT"
|
104 |
+
}
|
105 |
+
input_map {
|
106 |
+
key: "chunk_out_lens"
|
107 |
+
value: "CHUNK_OUT_LENS"
|
108 |
+
}
|
109 |
+
output_map {
|
110 |
+
key: "OUTPUT0"
|
111 |
+
value: "TRANSCRIPTS"
|
112 |
+
}
|
113 |
+
}
|
114 |
+
]
|
115 |
+
}
|
model_repo_stateful/wenet/1/__pycache__/model.cpython-38.pyc
ADDED
Binary file (4.26 kB). View file
|
|
model_repo_stateful/wenet/1/__pycache__/wenet_onnx_model.cpython-38.pyc
ADDED
Binary file (6.77 kB). View file
|
|
model_repo_stateful/wenet/1/model.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
import numpy as np
|
16 |
+
import json
|
17 |
+
import torch
|
18 |
+
from swig_decoders import PathTrie, TrieVector
|
19 |
+
|
20 |
+
# triton_python_backend_utils is available in every Triton Python model. You
|
21 |
+
# need to use this module to create inference requests and responses. It also
|
22 |
+
# contains some utility functions for extracting information from model_config
|
23 |
+
# and converting Triton input/output types to numpy types.
|
24 |
+
import triton_python_backend_utils as pb_utils
|
25 |
+
from wenet_onnx_model import WenetModel
|
26 |
+
|
27 |
+
from torch.utils.dlpack import from_dlpack
|
28 |
+
|
29 |
+
class TritonPythonModel:
|
30 |
+
"""Your Python model must use the same class name. Every Python model
|
31 |
+
that is created must have "TritonPythonModel" as the class name.
|
32 |
+
"""
|
33 |
+
|
34 |
+
def initialize(self, args):
|
35 |
+
"""`initialize` is called only once when the model is being loaded.
|
36 |
+
Implementing `initialize` function is optional. This function allows
|
37 |
+
the model to intialize any state associated with this model.
|
38 |
+
Parameters
|
39 |
+
----------
|
40 |
+
args : dict
|
41 |
+
Both keys and values are strings. The dictionary keys and values are:
|
42 |
+
* model_config: A JSON string containing the model configuration
|
43 |
+
* model_instance_kind: A string containing model instance kind
|
44 |
+
* model_instance_device_id: A string containing model instance device ID
|
45 |
+
* model_repository: Model repository path
|
46 |
+
* model_version: Model version
|
47 |
+
* model_name: Model name
|
48 |
+
"""
|
49 |
+
|
50 |
+
# You must parse model_config. JSON string is not parsed here
|
51 |
+
self.model_config = model_config = json.loads(args['model_config'])
|
52 |
+
|
53 |
+
# get device
|
54 |
+
if args["model_instance_kind"] == "GPU":
|
55 |
+
self.device = 'cuda'
|
56 |
+
else:
|
57 |
+
self.device = 'cpu'
|
58 |
+
|
59 |
+
# get parameter configurations
|
60 |
+
self.model = WenetModel(self.model_config, self.device)
|
61 |
+
|
62 |
+
# Get OUTPUT0 configuration
|
63 |
+
output0_config = pb_utils.get_output_config_by_name(
|
64 |
+
model_config, "OUTPUT0")
|
65 |
+
|
66 |
+
# Convert Triton types to numpy types
|
67 |
+
self.output0_dtype = pb_utils.triton_string_to_numpy(
|
68 |
+
output0_config['data_type'])
|
69 |
+
|
70 |
+
# use to record every sequence state
|
71 |
+
self.seq_states = {}
|
72 |
+
print("Finish Init")
|
73 |
+
|
74 |
+
def execute(self, requests):
|
75 |
+
"""
|
76 |
+
requests : list
|
77 |
+
A list of pb_utils.InferenceRequest
|
78 |
+
Returns
|
79 |
+
-------
|
80 |
+
list
|
81 |
+
A list of pb_utils.InferenceResponse. The length of this list must
|
82 |
+
be the same as `requests`
|
83 |
+
"""
|
84 |
+
responses = []
|
85 |
+
batch_log_probs, batch_log_probs_idx, batch_len, batch_states = [], [], [], []
|
86 |
+
cur_encoder_out = []
|
87 |
+
|
88 |
+
batch_encoder_hist = []
|
89 |
+
batch_start = []
|
90 |
+
|
91 |
+
trieVector = TrieVector()
|
92 |
+
|
93 |
+
rescore_index = {}
|
94 |
+
batch_idx2_corrid = {}
|
95 |
+
|
96 |
+
# Every Python backend must iterate over everyone of the requests
|
97 |
+
# and create a pb_utils.InferenceResponse for each of them.
|
98 |
+
batch_idx = 0
|
99 |
+
for request in requests:
|
100 |
+
# Get INPUT0
|
101 |
+
in_0 = pb_utils.get_input_tensor_by_name(request, "log_probs")
|
102 |
+
batch_log_probs.append(in_0.as_numpy()[0])
|
103 |
+
in_1 = pb_utils.get_input_tensor_by_name(request, "log_probs_idx")
|
104 |
+
batch_log_probs_idx.append(in_1.as_numpy()[0])
|
105 |
+
if self.model.rescoring:
|
106 |
+
in_2 = pb_utils.get_input_tensor_by_name(request, "chunk_out")
|
107 |
+
# important to use clone or this tensor
|
108 |
+
# the tensor will be released after one inference
|
109 |
+
in_2 = from_dlpack(in_2.to_dlpack()).clone()
|
110 |
+
cur_encoder_out.append(in_2[0])
|
111 |
+
in_3 = pb_utils.get_input_tensor_by_name(request, "chunk_out_lens")
|
112 |
+
batch_len.append(in_3.as_numpy())
|
113 |
+
|
114 |
+
in_start = pb_utils.get_input_tensor_by_name(request, "START")
|
115 |
+
start = in_start.as_numpy()[0][0]
|
116 |
+
|
117 |
+
if start:
|
118 |
+
batch_start.append(True)
|
119 |
+
else:
|
120 |
+
batch_start.append(False)
|
121 |
+
|
122 |
+
in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
|
123 |
+
ready = in_ready.as_numpy()[0][0]
|
124 |
+
|
125 |
+
in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
|
126 |
+
corrid = in_corrid.as_numpy()[0][0]
|
127 |
+
|
128 |
+
in_end = pb_utils.get_input_tensor_by_name(request, "END")
|
129 |
+
end = in_end.as_numpy()[0][0]
|
130 |
+
|
131 |
+
if start and ready:
|
132 |
+
# intialize states
|
133 |
+
encoder_out = self.model.generate_init_cache()
|
134 |
+
root = PathTrie()
|
135 |
+
# register this sequence
|
136 |
+
self.seq_states[corrid] = [root, encoder_out]
|
137 |
+
|
138 |
+
if end and ready:
|
139 |
+
rescore_index[batch_idx] = 1
|
140 |
+
|
141 |
+
if ready:
|
142 |
+
root, encoder_out = self.seq_states[corrid]
|
143 |
+
trieVector.append(root)
|
144 |
+
batch_idx2_corrid[batch_idx] = corrid
|
145 |
+
batch_encoder_hist.append(encoder_out)
|
146 |
+
|
147 |
+
batch_idx += 1
|
148 |
+
|
149 |
+
batch_states = [trieVector, batch_start, batch_encoder_hist, cur_encoder_out]
|
150 |
+
res_sents, new_states = self.model.infer(batch_log_probs, batch_log_probs_idx,
|
151 |
+
batch_len, rescore_index, batch_states)
|
152 |
+
cur_encoder_out = new_states
|
153 |
+
for i in range(len(res_sents)):
|
154 |
+
sent = np.array(res_sents[i])
|
155 |
+
out_tensor_0 = pb_utils.Tensor("OUTPUT0", sent.astype(self.output0_dtype))
|
156 |
+
response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0])
|
157 |
+
responses.append(response)
|
158 |
+
corr = batch_idx2_corrid[i]
|
159 |
+
if i in rescore_index:
|
160 |
+
# this response ends, remove it
|
161 |
+
del self.seq_states[corr]
|
162 |
+
else:
|
163 |
+
if self.model.rescoring:
|
164 |
+
if self.seq_states[corr][1] is None:
|
165 |
+
self.seq_states[corr][1] = cur_encoder_out[i]
|
166 |
+
else:
|
167 |
+
new_hist = torch.cat([self.seq_states[corr][1],
|
168 |
+
cur_encoder_out[i]], axis=0)
|
169 |
+
self.seq_states[corr][1] = new_hist
|
170 |
+
|
171 |
+
assert len(requests) == len(responses)
|
172 |
+
return responses
|
173 |
+
|
174 |
+
def finalize(self):
|
175 |
+
"""`finalize` is called only once when the model is being unloaded.
|
176 |
+
Implementing `finalize` function is OPTIONAL. This function allows
|
177 |
+
the model to perform any necessary clean ups before exit.
|
178 |
+
"""
|
179 |
+
print('Cleaning up...')
|
180 |
+
del self.model
|
model_repo_stateful/wenet/1/wenet_onnx_model.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
|
16 |
+
import multiprocessing
|
17 |
+
import numpy as np
|
18 |
+
import os
|
19 |
+
import torch
|
20 |
+
import triton_python_backend_utils as pb_utils
|
21 |
+
from torch.utils.dlpack import to_dlpack, from_dlpack
|
22 |
+
from swig_decoders import ctc_beam_search_decoder_batch, Scorer, map_batch
|
23 |
+
|
24 |
+
class WenetModel(object):
|
25 |
+
def __init__(self, model_config, device):
|
26 |
+
params = self.parse_model_parameters(model_config['parameters'])
|
27 |
+
|
28 |
+
self.device = device
|
29 |
+
print("Using device", device)
|
30 |
+
print("Successfully load model !")
|
31 |
+
|
32 |
+
# load vocabulary
|
33 |
+
ret = self.load_vocab(params["vocab_path"])
|
34 |
+
self.id2vocab, self.vocab, space_id, blank_id, sos_eos = ret
|
35 |
+
self.space_id = space_id if space_id else -1
|
36 |
+
self.blank_id = blank_id if blank_id else 0
|
37 |
+
self.eos = self.sos = sos_eos if sos_eos else len(self.vocab) - 1
|
38 |
+
print("Successfully load vocabulary !")
|
39 |
+
self.params = params
|
40 |
+
|
41 |
+
# beam search setting
|
42 |
+
self.beam_size = params.get("beam_size")
|
43 |
+
self.cutoff_prob = params.get("cutoff_prob")
|
44 |
+
|
45 |
+
# language model
|
46 |
+
lm_path = params.get("lm_path", None)
|
47 |
+
alpha, beta = params.get('alpha'), params.get('beta')
|
48 |
+
self.scorer = None
|
49 |
+
if os.path.exists(lm_path):
|
50 |
+
self.scorer = Scorer(alpha, beta, lm_path, self.vocab)
|
51 |
+
|
52 |
+
self.bidecoder = params.get('bidecoder')
|
53 |
+
# rescore setting
|
54 |
+
self.rescoring = params.get("rescoring", 0)
|
55 |
+
print("Using rescoring:", bool(self.rescoring))
|
56 |
+
print("Successfully load all parameters!")
|
57 |
+
|
58 |
+
log_probs_config = pb_utils.get_input_config_by_name(
|
59 |
+
model_config, "log_probs")
|
60 |
+
# Convert Triton types to numpy types
|
61 |
+
log_probs_dtype = pb_utils.triton_string_to_numpy(
|
62 |
+
log_probs_config['data_type'])
|
63 |
+
|
64 |
+
if log_probs_dtype == np.float32:
|
65 |
+
self.dtype = torch.float32
|
66 |
+
else:
|
67 |
+
self.dtype = torch.float16
|
68 |
+
|
69 |
+
def generate_init_cache(self):
|
70 |
+
encoder_out = None
|
71 |
+
return encoder_out
|
72 |
+
|
73 |
+
def load_vocab(self, vocab_file):
|
74 |
+
"""
|
75 |
+
load lang_char.txt
|
76 |
+
"""
|
77 |
+
id2vocab = {}
|
78 |
+
space_id, blank_id, sos_eos = None, None, None
|
79 |
+
with open(vocab_file, "r", encoding="utf-8") as f:
|
80 |
+
for line in f:
|
81 |
+
line = line.strip()
|
82 |
+
char, id = line.split()
|
83 |
+
id2vocab[int(id)] = char
|
84 |
+
if char == " ":
|
85 |
+
space_id = int(id)
|
86 |
+
elif char == "<blank>":
|
87 |
+
blank_id = int(id)
|
88 |
+
elif char == "<sos/eos>":
|
89 |
+
sos_eos = int(id)
|
90 |
+
vocab = [0] * len(id2vocab)
|
91 |
+
for id, char in id2vocab.items():
|
92 |
+
vocab[id] = char
|
93 |
+
return (id2vocab, vocab, space_id, blank_id, sos_eos)
|
94 |
+
|
95 |
+
def parse_model_parameters(self, model_parameters):
|
96 |
+
model_p = {"beam_size": 10,
|
97 |
+
"cutoff_prob": 0.999,
|
98 |
+
"vocab_path": None,
|
99 |
+
"lm_path": None,
|
100 |
+
"alpha": 2.0,
|
101 |
+
"beta": 1.0,
|
102 |
+
"rescoring": 0,
|
103 |
+
"bidecoder": 1}
|
104 |
+
# get parameter configurations
|
105 |
+
for li in model_parameters.items():
|
106 |
+
key, value = li
|
107 |
+
true_value = value["string_value"]
|
108 |
+
if key not in model_p:
|
109 |
+
continue
|
110 |
+
key_type = type(model_p[key])
|
111 |
+
if key_type == type(None):
|
112 |
+
model_p[key] = true_value
|
113 |
+
else:
|
114 |
+
model_p[key] = key_type(true_value)
|
115 |
+
assert model_p["vocab_path"] is not None
|
116 |
+
return model_p
|
117 |
+
|
118 |
+
def infer(self, batch_log_probs, batch_log_probs_idx,
|
119 |
+
seq_lens, rescore_index, batch_states):
|
120 |
+
"""
|
121 |
+
batch_states = [trieVector, batch_start,
|
122 |
+
batch_encoder_hist, cur_encoder_out]
|
123 |
+
"""
|
124 |
+
trie_vector, batch_start, batch_encoder_hist, cur_encoder_out = batch_states
|
125 |
+
num_processes = min(multiprocessing.cpu_count(), len(batch_log_probs))
|
126 |
+
|
127 |
+
score_hyps = self.batch_ctc_prefix_beam_search_cpu(batch_log_probs,
|
128 |
+
batch_log_probs_idx,
|
129 |
+
seq_lens,
|
130 |
+
trie_vector,
|
131 |
+
batch_start,
|
132 |
+
self.beam_size,
|
133 |
+
self.blank_id,
|
134 |
+
self.space_id,
|
135 |
+
self.cutoff_prob,
|
136 |
+
num_processes,
|
137 |
+
self.scorer)
|
138 |
+
|
139 |
+
if self.rescoring and len(rescore_index) != 0:
|
140 |
+
# find the end of sequence
|
141 |
+
rescore_encoder_hist = []
|
142 |
+
rescore_encoder_lens = []
|
143 |
+
rescore_hyps = []
|
144 |
+
res_idx = list(rescore_index.keys())
|
145 |
+
max_length = -1
|
146 |
+
for idx in res_idx:
|
147 |
+
hist_enc = batch_encoder_hist[idx]
|
148 |
+
if hist_enc is None:
|
149 |
+
cur_enc = cur_encoder_out[idx]
|
150 |
+
else:
|
151 |
+
cur_enc = torch.cat([hist_enc, cur_encoder_out[idx]], axis=0)
|
152 |
+
rescore_encoder_hist.append(cur_enc)
|
153 |
+
cur_mask_len = int(len(hist_enc) + seq_lens[idx])
|
154 |
+
rescore_encoder_lens.append(cur_mask_len)
|
155 |
+
rescore_hyps.append(score_hyps[idx])
|
156 |
+
if cur_enc.shape[0] > max_length:
|
157 |
+
max_length = cur_enc.shape[0]
|
158 |
+
best_index = self.batch_rescoring(rescore_hyps, rescore_encoder_hist,
|
159 |
+
rescore_encoder_lens, max_length)
|
160 |
+
|
161 |
+
best_sent = []
|
162 |
+
j = 0
|
163 |
+
for idx, li in enumerate(score_hyps):
|
164 |
+
if idx in rescore_index and self.rescoring:
|
165 |
+
best_sent.append(li[best_index[j]][1])
|
166 |
+
j += 1
|
167 |
+
else:
|
168 |
+
best_sent.append(li[0][1])
|
169 |
+
|
170 |
+
final_result = map_batch(best_sent, self.vocab, num_processes)
|
171 |
+
|
172 |
+
return final_result, cur_encoder_out
|
173 |
+
|
174 |
+
def batch_ctc_prefix_beam_search_cpu(self, batch_log_probs_seq,
|
175 |
+
batch_log_probs_idx,
|
176 |
+
batch_len, batch_root,
|
177 |
+
batch_start, beam_size,
|
178 |
+
blank_id, space_id,
|
179 |
+
cutoff_prob, num_processes,
|
180 |
+
scorer):
|
181 |
+
"""
|
182 |
+
Return: Batch x Beam_size elements, each element is a tuple
|
183 |
+
(score, list of ids),
|
184 |
+
"""
|
185 |
+
|
186 |
+
batch_len_list = batch_len
|
187 |
+
batch_log_probs_seq_list = []
|
188 |
+
batch_log_probs_idx_list = []
|
189 |
+
for i in range(len(batch_len_list)):
|
190 |
+
cur_len = int(batch_len_list[i])
|
191 |
+
batch_log_probs_seq_list.append(batch_log_probs_seq[i][0:cur_len].tolist())
|
192 |
+
batch_log_probs_idx_list.append(batch_log_probs_idx[i][0:cur_len].tolist())
|
193 |
+
score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq_list,
|
194 |
+
batch_log_probs_idx_list,
|
195 |
+
batch_root,
|
196 |
+
batch_start,
|
197 |
+
beam_size,
|
198 |
+
num_processes,
|
199 |
+
blank_id,
|
200 |
+
space_id,
|
201 |
+
cutoff_prob,
|
202 |
+
scorer)
|
203 |
+
return score_hyps
|
204 |
+
|
205 |
+
def batch_rescoring(self, score_hyps, hist_enc, hist_mask_len, max_len):
|
206 |
+
"""
|
207 |
+
score_hyps: [((ctc_score, (id1, id2, id3, ....)), (), ...), ....]
|
208 |
+
hist_enc: [len1xF, len2xF, .....]
|
209 |
+
hist_mask: [1x1xlen1, 1x1xlen2]
|
210 |
+
return bzx1 best_index
|
211 |
+
"""
|
212 |
+
bz = len(hist_enc)
|
213 |
+
f = hist_enc[0].shape[-1]
|
214 |
+
beam_size = self.beam_size
|
215 |
+
encoder_lens = np.zeros((bz, 1), dtype=np.int32)
|
216 |
+
encoder_out = torch.zeros((bz, max_len, f), dtype=self.dtype)
|
217 |
+
hyps = []
|
218 |
+
ctc_score = torch.zeros((bz, beam_size), dtype=self.dtype)
|
219 |
+
max_seq_len = 0
|
220 |
+
for i in range(bz):
|
221 |
+
cur_len = hist_enc[i].shape[0]
|
222 |
+
encoder_out[i, 0:cur_len] = hist_enc[i]
|
223 |
+
encoder_lens[i, 0] = hist_mask_len[i]
|
224 |
+
|
225 |
+
# process candidate
|
226 |
+
if len(score_hyps[i]) < beam_size:
|
227 |
+
to_append = (beam_size - len(score_hyps[i])) * [(-10000, ())]
|
228 |
+
score_hyps[i] = list(score_hyps[i]) + to_append
|
229 |
+
for idx, c in enumerate(score_hyps[i]):
|
230 |
+
score, idlist = c
|
231 |
+
if score < -10000:
|
232 |
+
score = -10000
|
233 |
+
ctc_score[i][idx] = score
|
234 |
+
hyps.append(list(idlist))
|
235 |
+
if len(hyps[-1]) > max_seq_len:
|
236 |
+
max_seq_len = len(hyps[-1])
|
237 |
+
|
238 |
+
max_seq_len += 2
|
239 |
+
hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
|
240 |
+
hyps_pad_sos_eos = hyps_pad_sos_eos * self.eos # fill eos
|
241 |
+
if self.bidecoder:
|
242 |
+
r_hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
|
243 |
+
r_hyps_pad_sos_eos = r_hyps_pad_sos_eos * self.eos
|
244 |
+
|
245 |
+
hyps_lens_sos = np.ones((bz, beam_size), dtype=np.int32)
|
246 |
+
bz_id = 0
|
247 |
+
for idx, cand in enumerate(hyps):
|
248 |
+
bz_id = idx // beam_size
|
249 |
+
length = len(cand) + 2
|
250 |
+
bz_offset = idx % beam_size
|
251 |
+
pad_cand = [self.sos] + cand + [self.eos]
|
252 |
+
hyps_pad_sos_eos[bz_id][bz_offset][0 : length] = pad_cand
|
253 |
+
if self.bidecoder:
|
254 |
+
r_pad_cand = [self.sos] + cand[::-1] + [self.eos]
|
255 |
+
r_hyps_pad_sos_eos[bz_id][bz_offset][0:length] = r_pad_cand
|
256 |
+
hyps_lens_sos[bz_id][idx % beam_size] = len(cand) + 1
|
257 |
+
in0 = pb_utils.Tensor.from_dlpack("encoder_out", to_dlpack(encoder_out))
|
258 |
+
in1 = pb_utils.Tensor("encoder_out_lens", encoder_lens)
|
259 |
+
in2 = pb_utils.Tensor("hyps_pad_sos_eos", hyps_pad_sos_eos)
|
260 |
+
in3 = pb_utils.Tensor("hyps_lens_sos", hyps_lens_sos)
|
261 |
+
input_tensors = [in0, in1, in2, in3]
|
262 |
+
if self.bidecoder:
|
263 |
+
in4 = pb_utils.Tensor("r_hyps_pad_sos_eos", r_hyps_pad_sos_eos)
|
264 |
+
input_tensors.append(in4)
|
265 |
+
in5 = pb_utils.Tensor.from_dlpack("ctc_score", to_dlpack(ctc_score))
|
266 |
+
input_tensors.append(in5)
|
267 |
+
request = pb_utils.InferenceRequest(model_name='decoder',
|
268 |
+
requested_output_names=['best_index'],
|
269 |
+
inputs=input_tensors)
|
270 |
+
response = request.exec()
|
271 |
+
best_index = pb_utils.get_output_tensor_by_name(response, 'best_index')
|
272 |
+
best_index = from_dlpack(best_index.to_dlpack()).clone()
|
273 |
+
best_index = best_index.numpy()[:, 0]
|
274 |
+
return best_index
|
275 |
+
|
276 |
+
def __del__(self):
|
277 |
+
print("remove wenet model")
|
model_repo_stateful/wenet/config.pbtxt
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
name: "wenet"
|
3 |
+
backend: "python"
|
4 |
+
max_batch_size: 512
|
5 |
+
|
6 |
+
sequence_batching{
|
7 |
+
max_sequence_idle_microseconds: 5000000
|
8 |
+
oldest {
|
9 |
+
max_candidate_sequences: 1024
|
10 |
+
preferred_batch_size: [32, 64, 128, 256]
|
11 |
+
}
|
12 |
+
control_input [
|
13 |
+
{
|
14 |
+
name: "START",
|
15 |
+
control [
|
16 |
+
{
|
17 |
+
kind: CONTROL_SEQUENCE_START
|
18 |
+
fp32_false_true: [0, 1]
|
19 |
+
}
|
20 |
+
]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
name: "READY"
|
24 |
+
control [
|
25 |
+
{
|
26 |
+
kind: CONTROL_SEQUENCE_READY
|
27 |
+
fp32_false_true: [0, 1]
|
28 |
+
}
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
name: "CORRID",
|
33 |
+
control [
|
34 |
+
{
|
35 |
+
kind: CONTROL_SEQUENCE_CORRID
|
36 |
+
data_type: TYPE_UINT64
|
37 |
+
}
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
name: "END",
|
42 |
+
control [
|
43 |
+
{
|
44 |
+
kind: CONTROL_SEQUENCE_END
|
45 |
+
fp32_false_true: [0, 1]
|
46 |
+
}
|
47 |
+
]
|
48 |
+
}
|
49 |
+
]
|
50 |
+
}
|
51 |
+
|
52 |
+
parameters [
|
53 |
+
{
|
54 |
+
key: "beam_size",
|
55 |
+
value: { string_value: "10" }
|
56 |
+
},
|
57 |
+
{
|
58 |
+
key: "cutoff_prob",
|
59 |
+
value: { string_value: "0.9999" }
|
60 |
+
},
|
61 |
+
{
|
62 |
+
key: "alpha",
|
63 |
+
value: { string_value: "2" }
|
64 |
+
},
|
65 |
+
{
|
66 |
+
key: "beta",
|
67 |
+
value: { string_value: "1" }
|
68 |
+
},
|
69 |
+
{
|
70 |
+
key: "vocab_path",
|
71 |
+
value: { string_value: "/ws/onnx_model/units.txt"}
|
72 |
+
},
|
73 |
+
{
|
74 |
+
key: "lm_path",
|
75 |
+
value: { string_value: "/ws/onnx_model/lm.bin"}
|
76 |
+
},
|
77 |
+
{
|
78 |
+
key: "bidecoder",
|
79 |
+
value: { string_value: "0"}
|
80 |
+
},
|
81 |
+
{
|
82 |
+
key: "rescoring",
|
83 |
+
value: { string_value: "1" }
|
84 |
+
},
|
85 |
+
{
|
86 |
+
key: "FORCE_CPU_ONLY_INPUT_TENSORS",
|
87 |
+
value: {string_value:"yes"}
|
88 |
+
}
|
89 |
+
]
|
90 |
+
|
91 |
+
input [
|
92 |
+
{
|
93 |
+
name: "log_probs"
|
94 |
+
data_type: TYPE_FP16
|
95 |
+
dims: [-1, 10] # [-1, beam_size]
|
96 |
+
},
|
97 |
+
{
|
98 |
+
name: "log_probs_idx"
|
99 |
+
data_type: TYPE_INT64
|
100 |
+
dims: [-1, 10] # [-1, beam_size]
|
101 |
+
},
|
102 |
+
{
|
103 |
+
name: "chunk_out"
|
104 |
+
data_type: TYPE_FP16
|
105 |
+
dims: [-1, -1]
|
106 |
+
},
|
107 |
+
{
|
108 |
+
name: "chunk_out_lens"
|
109 |
+
data_type: TYPE_INT32
|
110 |
+
dims: [1]
|
111 |
+
}
|
112 |
+
]
|
113 |
+
output [
|
114 |
+
{
|
115 |
+
name: "OUTPUT0"
|
116 |
+
data_type: TYPE_STRING
|
117 |
+
dims: [1]
|
118 |
+
reshape { shape: [] }
|
119 |
+
}
|
120 |
+
]
|
121 |
+
instance_group [
|
122 |
+
{
|
123 |
+
count: 2
|
124 |
+
kind: KIND_CPU
|
125 |
+
}
|
126 |
+
]
|
model_repo_stateful/wenet/config_template.pbtxt
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
name: "wenet"
|
16 |
+
backend: "python"
|
17 |
+
max_batch_size: 512
|
18 |
+
|
19 |
+
sequence_batching{
|
20 |
+
max_sequence_idle_microseconds: 5000000
|
21 |
+
oldest {
|
22 |
+
max_candidate_sequences: 1024
|
23 |
+
preferred_batch_size: [32, 64, 128, 256]
|
24 |
+
}
|
25 |
+
control_input [
|
26 |
+
{
|
27 |
+
name: "START",
|
28 |
+
control [
|
29 |
+
{
|
30 |
+
kind: CONTROL_SEQUENCE_START
|
31 |
+
fp32_false_true: [0, 1]
|
32 |
+
}
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
name: "READY"
|
37 |
+
control [
|
38 |
+
{
|
39 |
+
kind: CONTROL_SEQUENCE_READY
|
40 |
+
fp32_false_true: [0, 1]
|
41 |
+
}
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
name: "CORRID",
|
46 |
+
control [
|
47 |
+
{
|
48 |
+
kind: CONTROL_SEQUENCE_CORRID
|
49 |
+
data_type: TYPE_UINT64
|
50 |
+
}
|
51 |
+
]
|
52 |
+
},
|
53 |
+
{
|
54 |
+
name: "END",
|
55 |
+
control [
|
56 |
+
{
|
57 |
+
kind: CONTROL_SEQUENCE_END
|
58 |
+
fp32_false_true: [0, 1]
|
59 |
+
}
|
60 |
+
]
|
61 |
+
}
|
62 |
+
]
|
63 |
+
}
|
64 |
+
|
65 |
+
parameters [
|
66 |
+
{
|
67 |
+
key: "beam_size",
|
68 |
+
value: { string_value: "#beam_size" }
|
69 |
+
},
|
70 |
+
{
|
71 |
+
key: "cutoff_prob",
|
72 |
+
value: { string_value: "0.9999" }
|
73 |
+
},
|
74 |
+
{
|
75 |
+
key: "alpha",
|
76 |
+
value: { string_value: "2" }
|
77 |
+
},
|
78 |
+
{
|
79 |
+
key: "beta",
|
80 |
+
value: { string_value: "1" }
|
81 |
+
},
|
82 |
+
{
|
83 |
+
key: "vocab_path",
|
84 |
+
value: { string_value: "/ws/onnx_model/units.txt"}
|
85 |
+
},
|
86 |
+
{
|
87 |
+
key: "lm_path",
|
88 |
+
value: { string_value: "/ws/onnx_model/lm.bin"}
|
89 |
+
},
|
90 |
+
{
|
91 |
+
key: "bidecoder",
|
92 |
+
value: { string_value: "#bidecoder"}
|
93 |
+
},
|
94 |
+
{
|
95 |
+
key: "rescoring",
|
96 |
+
value: { string_value: "1" }
|
97 |
+
},
|
98 |
+
{
|
99 |
+
key: "FORCE_CPU_ONLY_INPUT_TENSORS",
|
100 |
+
value: {string_value:"yes"}
|
101 |
+
}
|
102 |
+
]
|
103 |
+
|
104 |
+
input [
|
105 |
+
{
|
106 |
+
name: "log_probs"
|
107 |
+
data_type: TYPE_#DTYPE
|
108 |
+
dims: [-1, #beam_size] # [-1, beam_size]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
name: "log_probs_idx"
|
112 |
+
data_type: TYPE_INT64
|
113 |
+
dims: [-1, #beam_size] # [-1, beam_size]
|
114 |
+
},
|
115 |
+
{
|
116 |
+
name: "chunk_out"
|
117 |
+
data_type: TYPE_#DTYPE
|
118 |
+
dims: [-1, -1]
|
119 |
+
},
|
120 |
+
{
|
121 |
+
name: "chunk_out_lens"
|
122 |
+
data_type: TYPE_INT32
|
123 |
+
dims: [1]
|
124 |
+
}
|
125 |
+
]
|
126 |
+
output [
|
127 |
+
{
|
128 |
+
name: "OUTPUT0"
|
129 |
+
data_type: TYPE_STRING
|
130 |
+
dims: [1]
|
131 |
+
reshape { shape: [] }
|
132 |
+
}
|
133 |
+
]
|
134 |
+
instance_group [
|
135 |
+
{
|
136 |
+
count: 2
|
137 |
+
kind: KIND_CPU
|
138 |
+
}
|
139 |
+
]
|