jayke commited on
Commit
cf64894
·
1 Parent(s): ea52087
model_repo_stateful/decoder/1/.gitignore ADDED
File without changes
model_repo_stateful/decoder/1/decoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2272437072ed614f41591e42633e4cc3a1c63729ff490a1c3c89a789a05eb70a
3
+ size 56292294
model_repo_stateful/decoder/config.pbtxt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: "decoder"
3
+ backend: "onnxruntime"
4
+ default_model_filename: "decoder.onnx"
5
+
6
+ max_batch_size: 640
7
+ input [
8
+ {
9
+ name: "encoder_out"
10
+ data_type: TYPE_FP16
11
+ dims: [-1, 512] # [-1, feature_size]
12
+ },
13
+ {
14
+ name: "encoder_out_lens"
15
+ data_type: TYPE_INT32
16
+ dims: [1]
17
+ reshape: { shape: [ ] }
18
+ },
19
+ {
20
+ name: "hyps_pad_sos_eos"
21
+ data_type: TYPE_INT64
22
+ dims: [10, -1]
23
+ },
24
+ {
25
+ name: "hyps_lens_sos"
26
+ data_type: TYPE_INT32
27
+ dims: [10]
28
+ },
29
+ {
30
+ name: "ctc_score"
31
+ data_type: TYPE_FP16
32
+ dims: [10]
33
+ }
34
+ ]
35
+
36
+ output [
37
+ {
38
+ name: "best_index"
39
+ data_type: TYPE_INT64
40
+ dims: [1]
41
+ reshape: { shape: [ ] }
42
+ }
43
+ ]
44
+
45
+ dynamic_batching {
46
+ preferred_batch_size: [ 16, 32 ]
47
+ }
48
+
49
+ instance_group [
50
+ {
51
+ count: 2
52
+ kind: KIND_GPU
53
+ }
54
+ ]
55
+
model_repo_stateful/decoder/config_template.pbtxt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "decoder"
16
+ backend: "onnxruntime"
17
+ default_model_filename: "decoder.onnx"
18
+
19
+ max_batch_size: 640
20
+ input [
21
+ {
22
+ name: "encoder_out"
23
+ data_type: TYPE_#DTYPE
24
+ dims: [-1, #output_size]
25
+ },
26
+ {
27
+ name: "encoder_out_lens"
28
+ data_type: TYPE_INT32
29
+ dims: [1]
30
+ reshape: { shape: [ ] }
31
+ },
32
+ {
33
+ name: "hyps_pad_sos_eos"
34
+ data_type: TYPE_INT64
35
+ dims: [#beam_size, -1]
36
+ },
37
+ {
38
+ name: "hyps_lens_sos"
39
+ data_type: TYPE_INT32
40
+ dims: [#beam_size]
41
+ },
42
+ {
43
+ name: "r_hyps_pad_sos_eos"
44
+ data_type: TYPE_INT64
45
+ dims: [#beam_size, -1]
46
+ },
47
+ {
48
+ name: "ctc_score"
49
+ data_type: TYPE_#DTYPE
50
+ dims: [#beam_size]
51
+ }
52
+ ]
53
+
54
+ output [
55
+ {
56
+ name: "best_index"
57
+ data_type: TYPE_INT64
58
+ dims: [1]
59
+ reshape: { shape: [ ] }
60
+ }
61
+ ]
62
+
63
+ dynamic_batching {
64
+ preferred_batch_size: [ 16, 32 ]
65
+ }
66
+
67
+ instance_group [
68
+ {
69
+ count: 2
70
+ kind: KIND_GPU
71
+ }
72
+ ]
73
+
model_repo_stateful/decoder/config_template2.pbtxt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "decoder"
16
+ backend: "onnxruntime"
17
+ default_model_filename: "decoder.onnx"
18
+
19
+ max_batch_size: 640
20
+ input [
21
+ {
22
+ name: "encoder_out"
23
+ data_type: TYPE_#DTYPE
24
+ dims: [-1, #output_size] # [-1, feature_size]
25
+ },
26
+ {
27
+ name: "encoder_out_lens"
28
+ data_type: TYPE_INT32
29
+ dims: [1]
30
+ reshape: { shape: [ ] }
31
+ },
32
+ {
33
+ name: "hyps_pad_sos_eos"
34
+ data_type: TYPE_INT64
35
+ dims: [#beam_size, -1]
36
+ },
37
+ {
38
+ name: "hyps_lens_sos"
39
+ data_type: TYPE_INT32
40
+ dims: [#beam_size]
41
+ },
42
+ {
43
+ name: "ctc_score"
44
+ data_type: TYPE_#DTYPE
45
+ dims: [#beam_size]
46
+ }
47
+ ]
48
+
49
+ output [
50
+ {
51
+ name: "best_index"
52
+ data_type: TYPE_INT64
53
+ dims: [1]
54
+ reshape: { shape: [ ] }
55
+ }
56
+ ]
57
+
58
+ dynamic_batching {
59
+ preferred_batch_size: [ 16, 32 ]
60
+ }
61
+
62
+ instance_group [
63
+ {
64
+ count: 2
65
+ kind: KIND_GPU
66
+ }
67
+ ]
68
+
model_repo_stateful/encoder/1/.gitignore ADDED
File without changes
model_repo_stateful/encoder/1/encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9923799f94d885a0e0c798b0d63322565f48636bd67f7b95af7f8b7e4e4b0de
3
+ size 171905418
model_repo_stateful/encoder/config.pbtxt ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: "encoder"
3
+ backend: "onnxruntime"
4
+ default_model_filename: "encoder.onnx"
5
+
6
+ max_batch_size: 512
7
+
8
+ sequence_batching{
9
+ max_sequence_idle_microseconds: 5000000
10
+ oldest {
11
+ max_candidate_sequences: 1024
12
+ preferred_batch_size: [32, 64, 128, 256]
13
+ max_queue_delay_microseconds: 5000
14
+ }
15
+ control_input [
16
+ ]
17
+ state [
18
+ {
19
+ input_name: "offset"
20
+ output_name: "r_offset"
21
+ data_type: TYPE_INT64
22
+ dims: [ 1 ]
23
+ initial_state: {
24
+ data_type: TYPE_INT64
25
+ dims: [ 1 ]
26
+ zero_data: true
27
+ name: "initial state"
28
+ }
29
+ },
30
+ {
31
+ input_name: "att_cache"
32
+ output_name: "r_att_cache"
33
+ data_type: TYPE_FP16
34
+ dims: [ 12, 8, 80, 128 ]
35
+ initial_state: {
36
+ data_type: TYPE_FP16
37
+ dims: [ 12, 8, 80, 128 ]
38
+ zero_data: true
39
+ name: "initial state"
40
+ }
41
+ },
42
+ {
43
+ input_name: "cnn_cache"
44
+ output_name: "r_cnn_cache"
45
+ data_type: TYPE_FP16
46
+ dims: [12, 512, 14]
47
+ initial_state: {
48
+ data_type: TYPE_FP16
49
+ dims: [12, 512, 14]
50
+ zero_data: true
51
+ name: "initial state"
52
+ }
53
+ },
54
+ {
55
+ input_name: "cache_mask"
56
+ output_name: "r_cache_mask"
57
+ data_type: TYPE_FP16
58
+ dims: [1, 80]
59
+ initial_state: {
60
+ data_type: TYPE_FP16
61
+ dims: [1, 80]
62
+ zero_data: true
63
+ name: "initial state"
64
+ }
65
+ }
66
+ ]
67
+ }
68
+ input [
69
+ {
70
+ name: "chunk_xs"
71
+ data_type: TYPE_FP16
72
+ dims: [67, 80]
73
+ },
74
+ {
75
+ name: "chunk_lens"
76
+ data_type: TYPE_INT32
77
+ dims: [ 1 ]
78
+ reshape: { shape: [] }
79
+ }
80
+ ]
81
+ output [
82
+ {
83
+ name: "log_probs"
84
+ data_type: TYPE_FP16
85
+ dims: [-1, 10] # [-1, beam_size]
86
+ },
87
+ {
88
+ name: "log_probs_idx"
89
+ data_type: TYPE_INT64
90
+ dims: [-1, 10] # [-1, beam_size]
91
+ },
92
+ {
93
+ name: "chunk_out"
94
+ data_type: TYPE_FP16
95
+ dims: [-1, -1]
96
+ },
97
+ {
98
+ name: "chunk_out_lens"
99
+ data_type: TYPE_INT32
100
+ dims: [1]
101
+ reshape: { shape: [] }
102
+ }
103
+ ]
104
+ instance_group [
105
+ {
106
+ count: 2
107
+ kind: KIND_GPU
108
+ }
109
+ ]
model_repo_stateful/encoder/config_template.pbtxt ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "encoder"
16
+ backend: "onnxruntime"
17
+ default_model_filename: "encoder.onnx"
18
+
19
+ max_batch_size: 512
20
+
21
+ sequence_batching{
22
+ max_sequence_idle_microseconds: 5000000
23
+ oldest {
24
+ max_candidate_sequences: 1024
25
+ preferred_batch_size: [32, 64, 128, 256]
26
+ max_queue_delay_microseconds: 5000
27
+ }
28
+ control_input [
29
+ ]
30
+ state [
31
+ {
32
+ input_name: "offset"
33
+ output_name: "r_offset"
34
+ data_type: TYPE_INT64
35
+ dims: [ 1 ]
36
+ initial_state: {
37
+ data_type: TYPE_INT64
38
+ dims: [ 1 ]
39
+ zero_data: true
40
+ name: "initial state"
41
+ }
42
+ },
43
+ {
44
+ input_name: "att_cache"
45
+ output_name: "r_att_cache"
46
+ data_type: TYPE_#DTYPE
47
+ dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
48
+ initial_state: {
49
+ data_type: TYPE_#DTYPE
50
+ dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
51
+ zero_data: true
52
+ name: "initial state"
53
+ }
54
+ },
55
+ {
56
+ input_name: "cnn_cache"
57
+ output_name: "r_cnn_cache"
58
+ data_type: TYPE_#DTYPE
59
+ dims: [#num_layers, #output_size, #cnn_module_cache]
60
+ initial_state: {
61
+ data_type: TYPE_#DTYPE
62
+ dims: [#num_layers, #output_size, #cnn_module_cache]
63
+ zero_data: true
64
+ name: "initial state"
65
+ }
66
+ },
67
+ {
68
+ input_name: "cache_mask"
69
+ output_name: "r_cache_mask"
70
+ data_type: TYPE_#DTYPE
71
+ dims: [1, #cache_size]
72
+ initial_state: {
73
+ data_type: TYPE_#DTYPE
74
+ dims: [1, #cache_size]
75
+ zero_data: true
76
+ name: "initial state"
77
+ }
78
+ }
79
+ ]
80
+ }
81
+ input [
82
+ {
83
+ name: "chunk_xs"
84
+ data_type: TYPE_#DTYPE
85
+ dims: [#decoding_window, #num_mel_bins]
86
+ },
87
+ {
88
+ name: "chunk_lens"
89
+ data_type: TYPE_INT32
90
+ dims: [ 1 ]
91
+ reshape: { shape: [] }
92
+ }
93
+ ]
94
+ output [
95
+ {
96
+ name: "log_probs"
97
+ data_type: TYPE_#DTYPE
98
+ dims: [-1, #beam_size] # [-1, beam_size]
99
+ },
100
+ {
101
+ name: "log_probs_idx"
102
+ data_type: TYPE_INT64
103
+ dims: [-1, #beam_size] # [-1, beam_size]
104
+ },
105
+ {
106
+ name: "chunk_out"
107
+ data_type: TYPE_#DTYPE
108
+ dims: [-1, #encoder_output_size]
109
+ },
110
+ {
111
+ name: "chunk_out_lens"
112
+ data_type: TYPE_INT32
113
+ dims: [1]
114
+ reshape: { shape: [] }
115
+ }
116
+ ]
117
+ instance_group [
118
+ {
119
+ count: 2
120
+ kind: KIND_GPU
121
+ }
122
+ ]
model_repo_stateful/encoder/config_template2.pbtxt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "encoder"
16
+ backend: "onnxruntime"
17
+ default_model_filename: "encoder.onnx"
18
+
19
+ max_batch_size: 512
20
+
21
+ sequence_batching{
22
+ max_sequence_idle_microseconds: 5000000
23
+ oldest {
24
+ max_candidate_sequences: 1024
25
+ preferred_batch_size: [32, 64, 128, 256]
26
+ max_queue_delay_microseconds: 5000
27
+ }
28
+ control_input [
29
+ ]
30
+ state [
31
+ {
32
+ input_name: "offset"
33
+ output_name: "r_offset"
34
+ data_type: TYPE_INT64
35
+ dims: [ 1 ]
36
+ initial_state: {
37
+ data_type: TYPE_INT64
38
+ dims: [ 1 ]
39
+ zero_data: true
40
+ name: "initial state"
41
+ }
42
+ },
43
+ {
44
+ input_name: "att_cache"
45
+ output_name: "r_att_cache"
46
+ data_type: TYPE_#DTYPE
47
+ dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
48
+ initial_state: {
49
+ data_type: TYPE_#DTYPE
50
+ dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
51
+ zero_data: true
52
+ name: "initial state"
53
+ }
54
+ },
55
+ {
56
+ input_name: "cache_mask"
57
+ output_name: "r_cache_mask"
58
+ data_type: TYPE_#DTYPE
59
+ dims: [1, #cache_size]
60
+ initial_state: {
61
+ data_type: TYPE_#DTYPE
62
+ dims: [1, #cache_size]
63
+ zero_data: true
64
+ name: "initial state"
65
+ }
66
+ }
67
+ ]
68
+ }
69
+ input [
70
+ {
71
+ name: "chunk_xs"
72
+ data_type: TYPE_#DTYPE
73
+ dims: [#decoding_window, #num_mel_bins]
74
+ },
75
+ {
76
+ name: "chunk_lens"
77
+ data_type: TYPE_INT32
78
+ dims: [ 1 ]
79
+ reshape: { shape: [] }
80
+ }
81
+ ]
82
+ output [
83
+ {
84
+ name: "log_probs"
85
+ data_type: TYPE_#DTYPE
86
+ dims: [-1, #beam_size] # [-1, beam_size]
87
+ },
88
+ {
89
+ name: "log_probs_idx"
90
+ data_type: TYPE_INT64
91
+ dims: [-1, #beam_size] # [-1, beam_size]
92
+ },
93
+ {
94
+ name: "chunk_out"
95
+ data_type: TYPE_#DTYPE
96
+ dims: [-1, #encoder_output_size]
97
+ },
98
+ {
99
+ name: "chunk_out_lens"
100
+ data_type: TYPE_INT32
101
+ dims: [1]
102
+ reshape: { shape: [] }
103
+ }
104
+ ]
105
+ instance_group [
106
+ {
107
+ count: 2
108
+ kind: KIND_GPU
109
+ }
110
+ ]
model_repo_stateful/feature_extractor/1/__pycache__/model.cpython-38.pyc ADDED
Binary file (7.7 kB). View file
 
model_repo_stateful/feature_extractor/1/model.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import triton_python_backend_utils as pb_utils
16
+ from torch.utils.dlpack import from_dlpack
17
+ import torch
18
+ import kaldifeat
19
+ import _kaldifeat
20
+ from typing import List
21
+ import json
22
+ import numpy as np
23
+
24
+
25
+ class Fbank(torch.nn.Module):
26
+ def __init__(self, opts):
27
+ super(Fbank, self).__init__()
28
+ self.fbank = kaldifeat.Fbank(opts)
29
+
30
+ def forward(self, waves: List[torch.Tensor]):
31
+ return self.fbank(waves)
32
+
33
+
34
+ class Feat(object):
35
+ def __init__(
36
+ self, seqid, offset_ms, sample_rate, first_chunk_sz, frame_stride, device="cpu"
37
+ ):
38
+ self.seqid = seqid
39
+ self.sample_rate = sample_rate
40
+ self.wav = torch.tensor([], device=device)
41
+ self.offset = int(offset_ms / 1000 * sample_rate)
42
+ self.frames = None
43
+ self.frame_stride = int(frame_stride)
44
+ self.first_chunk_sz = first_chunk_sz
45
+ self.device = device
46
+
47
+ def add_wavs(self, wav: torch.tensor):
48
+ if len(self.wav) == 0 and len(wav) < self.first_chunk_sz:
49
+ raise Exception("Invalid first chunk size", len(wav))
50
+ wav = wav.to(self.device)
51
+ self.wav = torch.cat([self.wav, wav], axis=0)
52
+
53
+ def get_seg_wav(self):
54
+ seg = self.wav[:]
55
+ self.wav = self.wav[-self.offset :]
56
+ return seg
57
+
58
+ def add_frames(self, frames: torch.tensor):
59
+ """
60
+ frames: seq_len x feat_sz
61
+ """
62
+ if self.frames is None:
63
+ self.frames = frames
64
+ else:
65
+ self.frames = torch.cat([self.frames, frames], axis=0)
66
+
67
+ def get_frames(self, num_frames: int):
68
+ seg = self.frames[0:num_frames]
69
+ self.frames = self.frames[self.frame_stride :]
70
+ return seg
71
+
72
+
73
+ class TritonPythonModel:
74
+ """Your Python model must use the same class name. Every Python model
75
+ that is created must have "TritonPythonModel" as the class name.
76
+ """
77
+
78
+ def initialize(self, args):
79
+ """`initialize` is called only once when the model is being loaded.
80
+ Implementing `initialize` function is optional. This function allows
81
+ the model to initialize any state associated with this model.
82
+ Parameters
83
+ ----------
84
+ args : dict
85
+ Both keys and values are strings. The dictionary keys and values are:
86
+ * model_config: A JSON string containing the model configuration
87
+ * model_instance_kind: A string containing model instance kind
88
+ * model_instance_device_id: A string containing model instance device ID
89
+ * model_repository: Model repository path
90
+ * model_version: Model version
91
+ * model_name: Model name
92
+ """
93
+ self.model_config = model_config = json.loads(args["model_config"])
94
+ self.max_batch_size = max(model_config["max_batch_size"], 1)
95
+
96
+ if "GPU" in model_config["instance_group"][0]["kind"]:
97
+ self.device = "cuda"
98
+ else:
99
+ self.device = "cpu"
100
+
101
+ # Get OUTPUT0 configuration
102
+ output0_config = pb_utils.get_output_config_by_name(model_config, "speech")
103
+ # Convert Triton types to numpy types
104
+ self.output0_dtype = pb_utils.triton_string_to_numpy(
105
+ output0_config["data_type"]
106
+ )
107
+
108
+ if self.output0_dtype == np.float32:
109
+ self.dtype = torch.float32
110
+ else:
111
+ self.dtype = torch.float16
112
+
113
+ self.feature_size = output0_config["dims"][-1]
114
+ self.decoding_window = output0_config["dims"][-2]
115
+ # Get OUTPUT1 configuration
116
+ output1_config = pb_utils.get_output_config_by_name(
117
+ model_config, "speech_lengths"
118
+ )
119
+ # Convert Triton types to numpy types
120
+ self.output1_dtype = pb_utils.triton_string_to_numpy(
121
+ output1_config["data_type"]
122
+ )
123
+
124
+ feat_opt = self.parse_model_params(model_config["parameters"])
125
+
126
+ opts = kaldifeat.FbankOptions()
127
+ opts.frame_opts.dither = 0
128
+ opts.mel_opts.num_bins = self.feature_size
129
+ frame_length_ms = feat_opt["frame_length_ms"]
130
+ frame_shift_ms = feat_opt["frame_shift_ms"]
131
+ opts.frame_opts.frame_length_ms = frame_length_ms
132
+ opts.frame_opts.frame_shift_ms = frame_shift_ms
133
+ opts.frame_opts.samp_freq = feat_opt["sample_rate"]
134
+ opts.device = torch.device(self.device)
135
+ self.opts = opts
136
+ self.feature_extractor = Fbank(self.opts)
137
+ self.seq_feat = {}
138
+ chunk_size_s = feat_opt["chunk_size_s"]
139
+ sample_rate = feat_opt["sample_rate"]
140
+ self.chunk_size = int(chunk_size_s * sample_rate)
141
+ self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms
142
+
143
+ first_chunk_size = int(self.chunk_size)
144
+ cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
145
+ while cur_frames < self.decoding_window:
146
+ first_chunk_size += frame_shift_ms * sample_rate // 1000
147
+ cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
148
+ # self.pad_silence = first_chunk_size - self.chunk_size
149
+ self.first_chunk_size = first_chunk_size
150
+ self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms)
151
+ self.sample_rate = sample_rate
152
+ self.min_seg = frame_length_ms * sample_rate // 1000
153
+ print("MIN SEG IS", self.min_seg)
154
+
155
+ def get_offset(self, frame_length_ms, frame_shift_ms):
156
+ offset_ms = 0
157
+ while offset_ms + frame_shift_ms < frame_length_ms:
158
+ offset_ms += frame_shift_ms
159
+ return offset_ms
160
+
161
+ def parse_model_params(self, model_params):
162
+ model_p = {
163
+ "frame_length_ms": 25,
164
+ "frame_shift_ms": 10,
165
+ "sample_rate": 16000,
166
+ "chunk_size_s": 0.64,
167
+ }
168
+ # get parameter configurations
169
+ for li in model_params.items():
170
+ key, value = li
171
+ true_value = value["string_value"]
172
+ if key not in model_p:
173
+ continue
174
+ key_type = type(model_p[key])
175
+ if key_type == type(None):
176
+ model_p[key] = true_value
177
+ else:
178
+ model_p[key] = key_type(true_value)
179
+ return model_p
180
+
181
+ def execute(self, requests):
182
+ """`execute` must be implemented in every Python model. `execute`
183
+ function receives a list of pb_utils.InferenceRequest as the only
184
+ argument. This function is called when an inference is requested
185
+ for this model.
186
+ Parameters
187
+ ----------
188
+ requests : list
189
+ A list of pb_utils.InferenceRequest
190
+ Returns
191
+ -------
192
+ list
193
+ A list of pb_utils.InferenceResponse. The length of this list must
194
+ be the same as `requests`
195
+ """
196
+ total_waves = []
197
+ responses = []
198
+ batch_seqid = []
199
+ end_seqid = {}
200
+ for request in requests:
201
+ input0 = pb_utils.get_input_tensor_by_name(request, "wav")
202
+ # wavs = input0.as_numpy()[0]
203
+ wavs = from_dlpack(input0.to_dlpack())[0]
204
+
205
+ input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
206
+ # wav_lens = input1.as_numpy()[0][0]
207
+ wav_lens = from_dlpack(input1.to_dlpack())[0]
208
+ in_start = pb_utils.get_input_tensor_by_name(request, "START")
209
+ start = in_start.as_numpy()[0][0]
210
+ in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
211
+ ready = in_ready.as_numpy()[0][0]
212
+ in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
213
+ corrid = in_corrid.as_numpy()[0][0]
214
+ in_end = pb_utils.get_input_tensor_by_name(request, "END")
215
+ end = in_end.as_numpy()[0][0]
216
+ print(wavs.size(), wav_lens, ready, start, corrid, end)
217
+ if start:
218
+ self.seq_feat[corrid] = Feat(
219
+ corrid,
220
+ self.offset_ms,
221
+ self.sample_rate,
222
+ self.first_chunk_size,
223
+ self.frame_stride,
224
+ self.device,
225
+ )
226
+ if ready:
227
+ self.seq_feat[corrid].add_wavs(wavs[0:wav_lens])
228
+
229
+ batch_seqid.append(corrid)
230
+ if end:
231
+ end_seqid[corrid] = 1
232
+
233
+ # if not start
234
+ # check chunk ms size
235
+
236
+ wav = self.seq_feat[corrid].get_seg_wav() * 32768
237
+ if len(wav) < self.min_seg:
238
+ temp = torch.zeros(
239
+ self.min_seg, dtype=torch.float32, device=self.device
240
+ )
241
+ temp[0 : len(wav)] = wav[:]
242
+ wav = temp
243
+ total_waves.append(wav)
244
+
245
+ features = self.feature_extractor(total_waves)
246
+
247
+ batch_size = len(batch_seqid)
248
+ batch_speech = torch.zeros(
249
+ (batch_size, self.decoding_window, self.feature_size), dtype=self.dtype
250
+ )
251
+ batch_speech_lens = torch.zeros((batch_size, 1), dtype=torch.int32)
252
+ i = 0
253
+ for corrid, frames in zip(batch_seqid, features):
254
+ self.seq_feat[corrid].add_frames(frames)
255
+ r_frames = self.seq_feat[corrid].get_frames(self.decoding_window)
256
+ speech = batch_speech[i : i + 1]
257
+ speech_lengths = batch_speech_lens[i : i + 1]
258
+ i += 1
259
+ speech_lengths[0] = r_frames.size(0)
260
+ speech[0][0 : r_frames.size(0)] = r_frames.to(speech.device)
261
+ # out_tensor0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
262
+ # out_tensor1 = pb_utils.Tensor.from_dlpack("speech_lengths",
263
+ # to_dlpack(speech_lengths))
264
+ out_tensor0 = pb_utils.Tensor("speech", speech.numpy())
265
+ out_tensor1 = pb_utils.Tensor("speech_lengths", speech_lengths.numpy())
266
+ output_tensors = [out_tensor0, out_tensor1]
267
+ response = pb_utils.InferenceResponse(output_tensors=output_tensors)
268
+ responses.append(response)
269
+ if corrid in end_seqid:
270
+ del self.seq_feat[corrid]
271
+ print(
272
+ f"feature extractor results: corrid is {corrid}, speech is {speech.numpy()}, speech_lengths is {speech_lengths.numpy()}"
273
+ )
274
+ return responses
275
+
276
+ def finalize(self):
277
+ print("Remove feature extractor!")
model_repo_stateful/feature_extractor/config.pbtxt ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: "feature_extractor"
3
+ backend: "python"
4
+ max_batch_size: 512
5
+
6
+ parameters [
7
+ {
8
+ key: "frame_length_ms",
9
+ value: { string_value: "25" }
10
+ },
11
+ {
12
+ key: "frame_shift_ms"
13
+ value: { string_value: "10" }
14
+ },
15
+ {
16
+ key: "sample_rate"
17
+ value: { string_value: "16000" }
18
+ },
19
+ {
20
+ key: "chunk_size_s",
21
+ value: { string_value: "0.64" }
22
+ }
23
+ ]
24
+ sequence_batching{
25
+ max_sequence_idle_microseconds: 5000000
26
+ oldest {
27
+ max_candidate_sequences: 512
28
+ preferred_batch_size: [ 32, 64, 128, 256]
29
+ }
30
+ control_input [
31
+ {
32
+ name: "START",
33
+ control [
34
+ {
35
+ kind: CONTROL_SEQUENCE_START
36
+ fp32_false_true: [0, 1]
37
+ }
38
+ ]
39
+ },
40
+ {
41
+ name: "READY"
42
+ control [
43
+ {
44
+ kind: CONTROL_SEQUENCE_READY
45
+ fp32_false_true: [0, 1]
46
+ }
47
+ ]
48
+ },
49
+ {
50
+ name: "CORRID",
51
+ control [
52
+ {
53
+ kind: CONTROL_SEQUENCE_CORRID
54
+ data_type: TYPE_UINT64
55
+ }
56
+ ]
57
+ },
58
+ {
59
+ name: "END",
60
+ control [
61
+ {
62
+ kind: CONTROL_SEQUENCE_END
63
+ fp32_false_true: [0, 1]
64
+ }
65
+ ]
66
+ }
67
+ ]
68
+ }
69
+ input [
70
+ {
71
+ name: "wav"
72
+ data_type: TYPE_FP32
73
+ dims: [-1]
74
+ },
75
+ {
76
+ name: "wav_lens"
77
+ data_type: TYPE_INT32
78
+ dims: [1]
79
+ }
80
+ ]
81
+ output [
82
+ {
83
+ name: "speech"
84
+ data_type: TYPE_FP16 # FP32
85
+ dims: [67, 80]
86
+ },
87
+ {
88
+ name: "speech_lengths"
89
+ data_type: TYPE_INT32
90
+ dims: [1]
91
+ }
92
+ ]
93
+ instance_group [
94
+ {
95
+ count: 2
96
+ kind: KIND_GPU
97
+ }
98
+ ]
model_repo_stateful/feature_extractor/config_template.pbtxt ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "feature_extractor"
16
+ backend: "python"
17
+ max_batch_size: 512
18
+
19
+ parameters [
20
+ {
21
+ key: "frame_length_ms",
22
+ value: { string_value: "#frame_length" }
23
+ },
24
+ {
25
+ key: "frame_shift_ms"
26
+ value: { string_value: "#frame_shift" }
27
+ },
28
+ {
29
+ key: "sample_rate"
30
+ value: { string_value: "#sample_rate" }
31
+ },
32
+ {
33
+ key: "chunk_size_s",
34
+ value: { string_value: "#chunk_size_in_seconds" }
35
+ }
36
+ ]
37
+ sequence_batching{
38
+ max_sequence_idle_microseconds: 5000000
39
+ oldest {
40
+ max_candidate_sequences: 512
41
+ preferred_batch_size: [ 32, 64, 128, 256]
42
+ }
43
+ control_input [
44
+ {
45
+ name: "START",
46
+ control [
47
+ {
48
+ kind: CONTROL_SEQUENCE_START
49
+ fp32_false_true: [0, 1]
50
+ }
51
+ ]
52
+ },
53
+ {
54
+ name: "READY"
55
+ control [
56
+ {
57
+ kind: CONTROL_SEQUENCE_READY
58
+ fp32_false_true: [0, 1]
59
+ }
60
+ ]
61
+ },
62
+ {
63
+ name: "CORRID",
64
+ control [
65
+ {
66
+ kind: CONTROL_SEQUENCE_CORRID
67
+ data_type: TYPE_UINT64
68
+ }
69
+ ]
70
+ },
71
+ {
72
+ name: "END",
73
+ control [
74
+ {
75
+ kind: CONTROL_SEQUENCE_END
76
+ fp32_false_true: [0, 1]
77
+ }
78
+ ]
79
+ }
80
+ ]
81
+ }
82
+ input [
83
+ {
84
+ name: "wav"
85
+ data_type: TYPE_FP32
86
+ dims: [-1]
87
+ },
88
+ {
89
+ name: "wav_lens"
90
+ data_type: TYPE_INT32
91
+ dims: [1]
92
+ }
93
+ ]
94
+ output [
95
+ {
96
+ name: "speech"
97
+ data_type: TYPE_#DTYPE # FP32
98
+ dims: [#decoding_window, #num_mel_bins]
99
+ },
100
+ {
101
+ name: "speech_lengths"
102
+ data_type: TYPE_INT32
103
+ dims: [1]
104
+ }
105
+ ]
106
+ instance_group [
107
+ {
108
+ count: 2
109
+ kind: KIND_GPU
110
+ }
111
+ ]
model_repo_stateful/streaming_wenet/1/.gitignore ADDED
File without changes
model_repo_stateful/streaming_wenet/config.pbtxt ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: "streaming_wenet"
3
+ platform: "ensemble"
4
+ max_batch_size: 512 #MAX_BATCH
5
+
6
+ input [
7
+ {
8
+ name: "WAV"
9
+ data_type: TYPE_FP32
10
+ dims: [-1]
11
+ },
12
+ {
13
+ name: "WAV_LENS"
14
+ data_type: TYPE_INT32
15
+ dims: [1]
16
+ }
17
+ ]
18
+
19
+ output [
20
+ {
21
+ name: "TRANSCRIPTS"
22
+ data_type: TYPE_STRING
23
+ dims: [1]
24
+ }
25
+ ]
26
+
27
+ ensemble_scheduling {
28
+ step [
29
+ {
30
+ model_name: "feature_extractor"
31
+ model_version: -1
32
+ input_map {
33
+ key: "wav"
34
+ value: "WAV"
35
+ }
36
+ input_map {
37
+ key: "wav_lens"
38
+ value: "WAV_LENS"
39
+ }
40
+ output_map {
41
+ key: "speech"
42
+ value: "SPEECH"
43
+ }
44
+ output_map {
45
+ key: "speech_lengths"
46
+ value: "SPEECH_LENGTHS"
47
+ }
48
+ },
49
+ {
50
+ model_name: "encoder"
51
+ model_version: -1
52
+ input_map {
53
+ key: "chunk_xs"
54
+ value: "SPEECH"
55
+ }
56
+ input_map {
57
+ key: "chunk_lens"
58
+ value: "SPEECH_LENGTHS"
59
+ }
60
+ output_map {
61
+ key: "log_probs"
62
+ value: "LOG_PROBS"
63
+ }
64
+ output_map {
65
+ key: "log_probs_idx"
66
+ value: "LOG_PROBS_IDX"
67
+ }
68
+ output_map {
69
+ key: "chunk_out"
70
+ value: "CHUNK_OUT"
71
+ }
72
+ output_map {
73
+ key: "chunk_out_lens"
74
+ value: "CHUNK_OUT_LENS"
75
+ }
76
+ },
77
+ {
78
+ model_name: "wenet"
79
+ model_version: -1
80
+ input_map {
81
+ key: "log_probs"
82
+ value: "LOG_PROBS"
83
+ }
84
+ input_map {
85
+ key: "log_probs_idx"
86
+ value: "LOG_PROBS_IDX"
87
+ }
88
+ input_map {
89
+ key: "chunk_out"
90
+ value: "CHUNK_OUT"
91
+ }
92
+ input_map {
93
+ key: "chunk_out_lens"
94
+ value: "CHUNK_OUT_LENS"
95
+ }
96
+ output_map {
97
+ key: "OUTPUT0"
98
+ value: "TRANSCRIPTS"
99
+ }
100
+ }
101
+ ]
102
+ }
model_repo_stateful/streaming_wenet/config_template.pbtxt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "streaming_wenet"
16
+ platform: "ensemble"
17
+ max_batch_size: 512 #MAX_BATCH
18
+
19
+ input [
20
+ {
21
+ name: "WAV"
22
+ data_type: TYPE_FP32
23
+ dims: [-1]
24
+ },
25
+ {
26
+ name: "WAV_LENS"
27
+ data_type: TYPE_INT32
28
+ dims: [1]
29
+ }
30
+ ]
31
+
32
+ output [
33
+ {
34
+ name: "TRANSCRIPTS"
35
+ data_type: TYPE_STRING
36
+ dims: [1]
37
+ }
38
+ ]
39
+
40
+ ensemble_scheduling {
41
+ step [
42
+ {
43
+ model_name: "feature_extractor"
44
+ model_version: -1
45
+ input_map {
46
+ key: "wav"
47
+ value: "WAV"
48
+ }
49
+ input_map {
50
+ key: "wav_lens"
51
+ value: "WAV_LENS"
52
+ }
53
+ output_map {
54
+ key: "speech"
55
+ value: "SPEECH"
56
+ }
57
+ output_map {
58
+ key: "speech_lengths"
59
+ value: "SPEECH_LENGTHS"
60
+ }
61
+ },
62
+ {
63
+ model_name: "encoder"
64
+ model_version: -1
65
+ input_map {
66
+ key: "chunk_xs"
67
+ value: "SPEECH"
68
+ }
69
+ input_map {
70
+ key: "chunk_lens"
71
+ value: "SPEECH_LENGTHS"
72
+ }
73
+ output_map {
74
+ key: "log_probs"
75
+ value: "LOG_PROBS"
76
+ }
77
+ output_map {
78
+ key: "log_probs_idx"
79
+ value: "LOG_PROBS_IDX"
80
+ }
81
+ output_map {
82
+ key: "chunk_out"
83
+ value: "CHUNK_OUT"
84
+ }
85
+ output_map {
86
+ key: "chunk_out_lens"
87
+ value: "CHUNK_OUT_LENS"
88
+ }
89
+ },
90
+ {
91
+ model_name: "wenet"
92
+ model_version: -1
93
+ input_map {
94
+ key: "log_probs"
95
+ value: "LOG_PROBS"
96
+ }
97
+ input_map {
98
+ key: "log_probs_idx"
99
+ value: "LOG_PROBS_IDX"
100
+ }
101
+ input_map {
102
+ key: "chunk_out"
103
+ value: "CHUNK_OUT"
104
+ }
105
+ input_map {
106
+ key: "chunk_out_lens"
107
+ value: "CHUNK_OUT_LENS"
108
+ }
109
+ output_map {
110
+ key: "OUTPUT0"
111
+ value: "TRANSCRIPTS"
112
+ }
113
+ }
114
+ ]
115
+ }
model_repo_stateful/wenet/1/__pycache__/model.cpython-38.pyc ADDED
Binary file (4.26 kB). View file
 
model_repo_stateful/wenet/1/__pycache__/wenet_onnx_model.cpython-38.pyc ADDED
Binary file (6.77 kB). View file
 
model_repo_stateful/wenet/1/model.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import json
17
+ import torch
18
+ from swig_decoders import PathTrie, TrieVector
19
+
20
+ # triton_python_backend_utils is available in every Triton Python model. You
21
+ # need to use this module to create inference requests and responses. It also
22
+ # contains some utility functions for extracting information from model_config
23
+ # and converting Triton input/output types to numpy types.
24
+ import triton_python_backend_utils as pb_utils
25
+ from wenet_onnx_model import WenetModel
26
+
27
+ from torch.utils.dlpack import from_dlpack
28
+
29
+ class TritonPythonModel:
30
+ """Your Python model must use the same class name. Every Python model
31
+ that is created must have "TritonPythonModel" as the class name.
32
+ """
33
+
34
+ def initialize(self, args):
35
+ """`initialize` is called only once when the model is being loaded.
36
+ Implementing `initialize` function is optional. This function allows
37
+ the model to intialize any state associated with this model.
38
+ Parameters
39
+ ----------
40
+ args : dict
41
+ Both keys and values are strings. The dictionary keys and values are:
42
+ * model_config: A JSON string containing the model configuration
43
+ * model_instance_kind: A string containing model instance kind
44
+ * model_instance_device_id: A string containing model instance device ID
45
+ * model_repository: Model repository path
46
+ * model_version: Model version
47
+ * model_name: Model name
48
+ """
49
+
50
+ # You must parse model_config. JSON string is not parsed here
51
+ self.model_config = model_config = json.loads(args['model_config'])
52
+
53
+ # get device
54
+ if args["model_instance_kind"] == "GPU":
55
+ self.device = 'cuda'
56
+ else:
57
+ self.device = 'cpu'
58
+
59
+ # get parameter configurations
60
+ self.model = WenetModel(self.model_config, self.device)
61
+
62
+ # Get OUTPUT0 configuration
63
+ output0_config = pb_utils.get_output_config_by_name(
64
+ model_config, "OUTPUT0")
65
+
66
+ # Convert Triton types to numpy types
67
+ self.output0_dtype = pb_utils.triton_string_to_numpy(
68
+ output0_config['data_type'])
69
+
70
+ # use to record every sequence state
71
+ self.seq_states = {}
72
+ print("Finish Init")
73
+
74
+ def execute(self, requests):
75
+ """
76
+ requests : list
77
+ A list of pb_utils.InferenceRequest
78
+ Returns
79
+ -------
80
+ list
81
+ A list of pb_utils.InferenceResponse. The length of this list must
82
+ be the same as `requests`
83
+ """
84
+ responses = []
85
+ batch_log_probs, batch_log_probs_idx, batch_len, batch_states = [], [], [], []
86
+ cur_encoder_out = []
87
+
88
+ batch_encoder_hist = []
89
+ batch_start = []
90
+
91
+ trieVector = TrieVector()
92
+
93
+ rescore_index = {}
94
+ batch_idx2_corrid = {}
95
+
96
+ # Every Python backend must iterate over everyone of the requests
97
+ # and create a pb_utils.InferenceResponse for each of them.
98
+ batch_idx = 0
99
+ for request in requests:
100
+ # Get INPUT0
101
+ in_0 = pb_utils.get_input_tensor_by_name(request, "log_probs")
102
+ batch_log_probs.append(in_0.as_numpy()[0])
103
+ in_1 = pb_utils.get_input_tensor_by_name(request, "log_probs_idx")
104
+ batch_log_probs_idx.append(in_1.as_numpy()[0])
105
+ if self.model.rescoring:
106
+ in_2 = pb_utils.get_input_tensor_by_name(request, "chunk_out")
107
+ # important to use clone or this tensor
108
+ # the tensor will be released after one inference
109
+ in_2 = from_dlpack(in_2.to_dlpack()).clone()
110
+ cur_encoder_out.append(in_2[0])
111
+ in_3 = pb_utils.get_input_tensor_by_name(request, "chunk_out_lens")
112
+ batch_len.append(in_3.as_numpy())
113
+
114
+ in_start = pb_utils.get_input_tensor_by_name(request, "START")
115
+ start = in_start.as_numpy()[0][0]
116
+
117
+ if start:
118
+ batch_start.append(True)
119
+ else:
120
+ batch_start.append(False)
121
+
122
+ in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
123
+ ready = in_ready.as_numpy()[0][0]
124
+
125
+ in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
126
+ corrid = in_corrid.as_numpy()[0][0]
127
+
128
+ in_end = pb_utils.get_input_tensor_by_name(request, "END")
129
+ end = in_end.as_numpy()[0][0]
130
+
131
+ if start and ready:
132
+ # intialize states
133
+ encoder_out = self.model.generate_init_cache()
134
+ root = PathTrie()
135
+ # register this sequence
136
+ self.seq_states[corrid] = [root, encoder_out]
137
+
138
+ if end and ready:
139
+ rescore_index[batch_idx] = 1
140
+
141
+ if ready:
142
+ root, encoder_out = self.seq_states[corrid]
143
+ trieVector.append(root)
144
+ batch_idx2_corrid[batch_idx] = corrid
145
+ batch_encoder_hist.append(encoder_out)
146
+
147
+ batch_idx += 1
148
+
149
+ batch_states = [trieVector, batch_start, batch_encoder_hist, cur_encoder_out]
150
+ res_sents, new_states = self.model.infer(batch_log_probs, batch_log_probs_idx,
151
+ batch_len, rescore_index, batch_states)
152
+ cur_encoder_out = new_states
153
+ for i in range(len(res_sents)):
154
+ sent = np.array(res_sents[i])
155
+ out_tensor_0 = pb_utils.Tensor("OUTPUT0", sent.astype(self.output0_dtype))
156
+ response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0])
157
+ responses.append(response)
158
+ corr = batch_idx2_corrid[i]
159
+ if i in rescore_index:
160
+ # this response ends, remove it
161
+ del self.seq_states[corr]
162
+ else:
163
+ if self.model.rescoring:
164
+ if self.seq_states[corr][1] is None:
165
+ self.seq_states[corr][1] = cur_encoder_out[i]
166
+ else:
167
+ new_hist = torch.cat([self.seq_states[corr][1],
168
+ cur_encoder_out[i]], axis=0)
169
+ self.seq_states[corr][1] = new_hist
170
+
171
+ assert len(requests) == len(responses)
172
+ return responses
173
+
174
+ def finalize(self):
175
+ """`finalize` is called only once when the model is being unloaded.
176
+ Implementing `finalize` function is OPTIONAL. This function allows
177
+ the model to perform any necessary clean ups before exit.
178
+ """
179
+ print('Cleaning up...')
180
+ del self.model
model_repo_stateful/wenet/1/wenet_onnx_model.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import multiprocessing
17
+ import numpy as np
18
+ import os
19
+ import torch
20
+ import triton_python_backend_utils as pb_utils
21
+ from torch.utils.dlpack import to_dlpack, from_dlpack
22
+ from swig_decoders import ctc_beam_search_decoder_batch, Scorer, map_batch
23
+
24
+ class WenetModel(object):
25
+ def __init__(self, model_config, device):
26
+ params = self.parse_model_parameters(model_config['parameters'])
27
+
28
+ self.device = device
29
+ print("Using device", device)
30
+ print("Successfully load model !")
31
+
32
+ # load vocabulary
33
+ ret = self.load_vocab(params["vocab_path"])
34
+ self.id2vocab, self.vocab, space_id, blank_id, sos_eos = ret
35
+ self.space_id = space_id if space_id else -1
36
+ self.blank_id = blank_id if blank_id else 0
37
+ self.eos = self.sos = sos_eos if sos_eos else len(self.vocab) - 1
38
+ print("Successfully load vocabulary !")
39
+ self.params = params
40
+
41
+ # beam search setting
42
+ self.beam_size = params.get("beam_size")
43
+ self.cutoff_prob = params.get("cutoff_prob")
44
+
45
+ # language model
46
+ lm_path = params.get("lm_path", None)
47
+ alpha, beta = params.get('alpha'), params.get('beta')
48
+ self.scorer = None
49
+ if os.path.exists(lm_path):
50
+ self.scorer = Scorer(alpha, beta, lm_path, self.vocab)
51
+
52
+ self.bidecoder = params.get('bidecoder')
53
+ # rescore setting
54
+ self.rescoring = params.get("rescoring", 0)
55
+ print("Using rescoring:", bool(self.rescoring))
56
+ print("Successfully load all parameters!")
57
+
58
+ log_probs_config = pb_utils.get_input_config_by_name(
59
+ model_config, "log_probs")
60
+ # Convert Triton types to numpy types
61
+ log_probs_dtype = pb_utils.triton_string_to_numpy(
62
+ log_probs_config['data_type'])
63
+
64
+ if log_probs_dtype == np.float32:
65
+ self.dtype = torch.float32
66
+ else:
67
+ self.dtype = torch.float16
68
+
69
+ def generate_init_cache(self):
70
+ encoder_out = None
71
+ return encoder_out
72
+
73
+ def load_vocab(self, vocab_file):
74
+ """
75
+ load lang_char.txt
76
+ """
77
+ id2vocab = {}
78
+ space_id, blank_id, sos_eos = None, None, None
79
+ with open(vocab_file, "r", encoding="utf-8") as f:
80
+ for line in f:
81
+ line = line.strip()
82
+ char, id = line.split()
83
+ id2vocab[int(id)] = char
84
+ if char == " ":
85
+ space_id = int(id)
86
+ elif char == "<blank>":
87
+ blank_id = int(id)
88
+ elif char == "<sos/eos>":
89
+ sos_eos = int(id)
90
+ vocab = [0] * len(id2vocab)
91
+ for id, char in id2vocab.items():
92
+ vocab[id] = char
93
+ return (id2vocab, vocab, space_id, blank_id, sos_eos)
94
+
95
+ def parse_model_parameters(self, model_parameters):
96
+ model_p = {"beam_size": 10,
97
+ "cutoff_prob": 0.999,
98
+ "vocab_path": None,
99
+ "lm_path": None,
100
+ "alpha": 2.0,
101
+ "beta": 1.0,
102
+ "rescoring": 0,
103
+ "bidecoder": 1}
104
+ # get parameter configurations
105
+ for li in model_parameters.items():
106
+ key, value = li
107
+ true_value = value["string_value"]
108
+ if key not in model_p:
109
+ continue
110
+ key_type = type(model_p[key])
111
+ if key_type == type(None):
112
+ model_p[key] = true_value
113
+ else:
114
+ model_p[key] = key_type(true_value)
115
+ assert model_p["vocab_path"] is not None
116
+ return model_p
117
+
118
+ def infer(self, batch_log_probs, batch_log_probs_idx,
119
+ seq_lens, rescore_index, batch_states):
120
+ """
121
+ batch_states = [trieVector, batch_start,
122
+ batch_encoder_hist, cur_encoder_out]
123
+ """
124
+ trie_vector, batch_start, batch_encoder_hist, cur_encoder_out = batch_states
125
+ num_processes = min(multiprocessing.cpu_count(), len(batch_log_probs))
126
+
127
+ score_hyps = self.batch_ctc_prefix_beam_search_cpu(batch_log_probs,
128
+ batch_log_probs_idx,
129
+ seq_lens,
130
+ trie_vector,
131
+ batch_start,
132
+ self.beam_size,
133
+ self.blank_id,
134
+ self.space_id,
135
+ self.cutoff_prob,
136
+ num_processes,
137
+ self.scorer)
138
+
139
+ if self.rescoring and len(rescore_index) != 0:
140
+ # find the end of sequence
141
+ rescore_encoder_hist = []
142
+ rescore_encoder_lens = []
143
+ rescore_hyps = []
144
+ res_idx = list(rescore_index.keys())
145
+ max_length = -1
146
+ for idx in res_idx:
147
+ hist_enc = batch_encoder_hist[idx]
148
+ if hist_enc is None:
149
+ cur_enc = cur_encoder_out[idx]
150
+ else:
151
+ cur_enc = torch.cat([hist_enc, cur_encoder_out[idx]], axis=0)
152
+ rescore_encoder_hist.append(cur_enc)
153
+ cur_mask_len = int(len(hist_enc) + seq_lens[idx])
154
+ rescore_encoder_lens.append(cur_mask_len)
155
+ rescore_hyps.append(score_hyps[idx])
156
+ if cur_enc.shape[0] > max_length:
157
+ max_length = cur_enc.shape[0]
158
+ best_index = self.batch_rescoring(rescore_hyps, rescore_encoder_hist,
159
+ rescore_encoder_lens, max_length)
160
+
161
+ best_sent = []
162
+ j = 0
163
+ for idx, li in enumerate(score_hyps):
164
+ if idx in rescore_index and self.rescoring:
165
+ best_sent.append(li[best_index[j]][1])
166
+ j += 1
167
+ else:
168
+ best_sent.append(li[0][1])
169
+
170
+ final_result = map_batch(best_sent, self.vocab, num_processes)
171
+
172
+ return final_result, cur_encoder_out
173
+
174
+ def batch_ctc_prefix_beam_search_cpu(self, batch_log_probs_seq,
175
+ batch_log_probs_idx,
176
+ batch_len, batch_root,
177
+ batch_start, beam_size,
178
+ blank_id, space_id,
179
+ cutoff_prob, num_processes,
180
+ scorer):
181
+ """
182
+ Return: Batch x Beam_size elements, each element is a tuple
183
+ (score, list of ids),
184
+ """
185
+
186
+ batch_len_list = batch_len
187
+ batch_log_probs_seq_list = []
188
+ batch_log_probs_idx_list = []
189
+ for i in range(len(batch_len_list)):
190
+ cur_len = int(batch_len_list[i])
191
+ batch_log_probs_seq_list.append(batch_log_probs_seq[i][0:cur_len].tolist())
192
+ batch_log_probs_idx_list.append(batch_log_probs_idx[i][0:cur_len].tolist())
193
+ score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq_list,
194
+ batch_log_probs_idx_list,
195
+ batch_root,
196
+ batch_start,
197
+ beam_size,
198
+ num_processes,
199
+ blank_id,
200
+ space_id,
201
+ cutoff_prob,
202
+ scorer)
203
+ return score_hyps
204
+
205
+ def batch_rescoring(self, score_hyps, hist_enc, hist_mask_len, max_len):
206
+ """
207
+ score_hyps: [((ctc_score, (id1, id2, id3, ....)), (), ...), ....]
208
+ hist_enc: [len1xF, len2xF, .....]
209
+ hist_mask: [1x1xlen1, 1x1xlen2]
210
+ return bzx1 best_index
211
+ """
212
+ bz = len(hist_enc)
213
+ f = hist_enc[0].shape[-1]
214
+ beam_size = self.beam_size
215
+ encoder_lens = np.zeros((bz, 1), dtype=np.int32)
216
+ encoder_out = torch.zeros((bz, max_len, f), dtype=self.dtype)
217
+ hyps = []
218
+ ctc_score = torch.zeros((bz, beam_size), dtype=self.dtype)
219
+ max_seq_len = 0
220
+ for i in range(bz):
221
+ cur_len = hist_enc[i].shape[0]
222
+ encoder_out[i, 0:cur_len] = hist_enc[i]
223
+ encoder_lens[i, 0] = hist_mask_len[i]
224
+
225
+ # process candidate
226
+ if len(score_hyps[i]) < beam_size:
227
+ to_append = (beam_size - len(score_hyps[i])) * [(-10000, ())]
228
+ score_hyps[i] = list(score_hyps[i]) + to_append
229
+ for idx, c in enumerate(score_hyps[i]):
230
+ score, idlist = c
231
+ if score < -10000:
232
+ score = -10000
233
+ ctc_score[i][idx] = score
234
+ hyps.append(list(idlist))
235
+ if len(hyps[-1]) > max_seq_len:
236
+ max_seq_len = len(hyps[-1])
237
+
238
+ max_seq_len += 2
239
+ hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
240
+ hyps_pad_sos_eos = hyps_pad_sos_eos * self.eos # fill eos
241
+ if self.bidecoder:
242
+ r_hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
243
+ r_hyps_pad_sos_eos = r_hyps_pad_sos_eos * self.eos
244
+
245
+ hyps_lens_sos = np.ones((bz, beam_size), dtype=np.int32)
246
+ bz_id = 0
247
+ for idx, cand in enumerate(hyps):
248
+ bz_id = idx // beam_size
249
+ length = len(cand) + 2
250
+ bz_offset = idx % beam_size
251
+ pad_cand = [self.sos] + cand + [self.eos]
252
+ hyps_pad_sos_eos[bz_id][bz_offset][0 : length] = pad_cand
253
+ if self.bidecoder:
254
+ r_pad_cand = [self.sos] + cand[::-1] + [self.eos]
255
+ r_hyps_pad_sos_eos[bz_id][bz_offset][0:length] = r_pad_cand
256
+ hyps_lens_sos[bz_id][idx % beam_size] = len(cand) + 1
257
+ in0 = pb_utils.Tensor.from_dlpack("encoder_out", to_dlpack(encoder_out))
258
+ in1 = pb_utils.Tensor("encoder_out_lens", encoder_lens)
259
+ in2 = pb_utils.Tensor("hyps_pad_sos_eos", hyps_pad_sos_eos)
260
+ in3 = pb_utils.Tensor("hyps_lens_sos", hyps_lens_sos)
261
+ input_tensors = [in0, in1, in2, in3]
262
+ if self.bidecoder:
263
+ in4 = pb_utils.Tensor("r_hyps_pad_sos_eos", r_hyps_pad_sos_eos)
264
+ input_tensors.append(in4)
265
+ in5 = pb_utils.Tensor.from_dlpack("ctc_score", to_dlpack(ctc_score))
266
+ input_tensors.append(in5)
267
+ request = pb_utils.InferenceRequest(model_name='decoder',
268
+ requested_output_names=['best_index'],
269
+ inputs=input_tensors)
270
+ response = request.exec()
271
+ best_index = pb_utils.get_output_tensor_by_name(response, 'best_index')
272
+ best_index = from_dlpack(best_index.to_dlpack()).clone()
273
+ best_index = best_index.numpy()[:, 0]
274
+ return best_index
275
+
276
+ def __del__(self):
277
+ print("remove wenet model")
model_repo_stateful/wenet/config.pbtxt ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: "wenet"
3
+ backend: "python"
4
+ max_batch_size: 512
5
+
6
+ sequence_batching{
7
+ max_sequence_idle_microseconds: 5000000
8
+ oldest {
9
+ max_candidate_sequences: 1024
10
+ preferred_batch_size: [32, 64, 128, 256]
11
+ }
12
+ control_input [
13
+ {
14
+ name: "START",
15
+ control [
16
+ {
17
+ kind: CONTROL_SEQUENCE_START
18
+ fp32_false_true: [0, 1]
19
+ }
20
+ ]
21
+ },
22
+ {
23
+ name: "READY"
24
+ control [
25
+ {
26
+ kind: CONTROL_SEQUENCE_READY
27
+ fp32_false_true: [0, 1]
28
+ }
29
+ ]
30
+ },
31
+ {
32
+ name: "CORRID",
33
+ control [
34
+ {
35
+ kind: CONTROL_SEQUENCE_CORRID
36
+ data_type: TYPE_UINT64
37
+ }
38
+ ]
39
+ },
40
+ {
41
+ name: "END",
42
+ control [
43
+ {
44
+ kind: CONTROL_SEQUENCE_END
45
+ fp32_false_true: [0, 1]
46
+ }
47
+ ]
48
+ }
49
+ ]
50
+ }
51
+
52
+ parameters [
53
+ {
54
+ key: "beam_size",
55
+ value: { string_value: "10" }
56
+ },
57
+ {
58
+ key: "cutoff_prob",
59
+ value: { string_value: "0.9999" }
60
+ },
61
+ {
62
+ key: "alpha",
63
+ value: { string_value: "2" }
64
+ },
65
+ {
66
+ key: "beta",
67
+ value: { string_value: "1" }
68
+ },
69
+ {
70
+ key: "vocab_path",
71
+ value: { string_value: "/ws/onnx_model/units.txt"}
72
+ },
73
+ {
74
+ key: "lm_path",
75
+ value: { string_value: "/ws/onnx_model/lm.bin"}
76
+ },
77
+ {
78
+ key: "bidecoder",
79
+ value: { string_value: "0"}
80
+ },
81
+ {
82
+ key: "rescoring",
83
+ value: { string_value: "1" }
84
+ },
85
+ {
86
+ key: "FORCE_CPU_ONLY_INPUT_TENSORS",
87
+ value: {string_value:"yes"}
88
+ }
89
+ ]
90
+
91
+ input [
92
+ {
93
+ name: "log_probs"
94
+ data_type: TYPE_FP16
95
+ dims: [-1, 10] # [-1, beam_size]
96
+ },
97
+ {
98
+ name: "log_probs_idx"
99
+ data_type: TYPE_INT64
100
+ dims: [-1, 10] # [-1, beam_size]
101
+ },
102
+ {
103
+ name: "chunk_out"
104
+ data_type: TYPE_FP16
105
+ dims: [-1, -1]
106
+ },
107
+ {
108
+ name: "chunk_out_lens"
109
+ data_type: TYPE_INT32
110
+ dims: [1]
111
+ }
112
+ ]
113
+ output [
114
+ {
115
+ name: "OUTPUT0"
116
+ data_type: TYPE_STRING
117
+ dims: [1]
118
+ reshape { shape: [] }
119
+ }
120
+ ]
121
+ instance_group [
122
+ {
123
+ count: 2
124
+ kind: KIND_CPU
125
+ }
126
+ ]
model_repo_stateful/wenet/config_template.pbtxt ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "wenet"
16
+ backend: "python"
17
+ max_batch_size: 512
18
+
19
+ sequence_batching{
20
+ max_sequence_idle_microseconds: 5000000
21
+ oldest {
22
+ max_candidate_sequences: 1024
23
+ preferred_batch_size: [32, 64, 128, 256]
24
+ }
25
+ control_input [
26
+ {
27
+ name: "START",
28
+ control [
29
+ {
30
+ kind: CONTROL_SEQUENCE_START
31
+ fp32_false_true: [0, 1]
32
+ }
33
+ ]
34
+ },
35
+ {
36
+ name: "READY"
37
+ control [
38
+ {
39
+ kind: CONTROL_SEQUENCE_READY
40
+ fp32_false_true: [0, 1]
41
+ }
42
+ ]
43
+ },
44
+ {
45
+ name: "CORRID",
46
+ control [
47
+ {
48
+ kind: CONTROL_SEQUENCE_CORRID
49
+ data_type: TYPE_UINT64
50
+ }
51
+ ]
52
+ },
53
+ {
54
+ name: "END",
55
+ control [
56
+ {
57
+ kind: CONTROL_SEQUENCE_END
58
+ fp32_false_true: [0, 1]
59
+ }
60
+ ]
61
+ }
62
+ ]
63
+ }
64
+
65
+ parameters [
66
+ {
67
+ key: "beam_size",
68
+ value: { string_value: "#beam_size" }
69
+ },
70
+ {
71
+ key: "cutoff_prob",
72
+ value: { string_value: "0.9999" }
73
+ },
74
+ {
75
+ key: "alpha",
76
+ value: { string_value: "2" }
77
+ },
78
+ {
79
+ key: "beta",
80
+ value: { string_value: "1" }
81
+ },
82
+ {
83
+ key: "vocab_path",
84
+ value: { string_value: "/ws/onnx_model/units.txt"}
85
+ },
86
+ {
87
+ key: "lm_path",
88
+ value: { string_value: "/ws/onnx_model/lm.bin"}
89
+ },
90
+ {
91
+ key: "bidecoder",
92
+ value: { string_value: "#bidecoder"}
93
+ },
94
+ {
95
+ key: "rescoring",
96
+ value: { string_value: "1" }
97
+ },
98
+ {
99
+ key: "FORCE_CPU_ONLY_INPUT_TENSORS",
100
+ value: {string_value:"yes"}
101
+ }
102
+ ]
103
+
104
+ input [
105
+ {
106
+ name: "log_probs"
107
+ data_type: TYPE_#DTYPE
108
+ dims: [-1, #beam_size] # [-1, beam_size]
109
+ },
110
+ {
111
+ name: "log_probs_idx"
112
+ data_type: TYPE_INT64
113
+ dims: [-1, #beam_size] # [-1, beam_size]
114
+ },
115
+ {
116
+ name: "chunk_out"
117
+ data_type: TYPE_#DTYPE
118
+ dims: [-1, -1]
119
+ },
120
+ {
121
+ name: "chunk_out_lens"
122
+ data_type: TYPE_INT32
123
+ dims: [1]
124
+ }
125
+ ]
126
+ output [
127
+ {
128
+ name: "OUTPUT0"
129
+ data_type: TYPE_STRING
130
+ dims: [1]
131
+ reshape { shape: [] }
132
+ }
133
+ ]
134
+ instance_group [
135
+ {
136
+ count: 2
137
+ kind: KIND_CPU
138
+ }
139
+ ]