alibabasglab commited on
Commit
8ca20ba
·
verified ·
1 Parent(s): ed2aa07

Update utils/decode.py

Browse files
Files changed (1) hide show
  1. utils/decode.py +4 -4
utils/decode.py CHANGED
@@ -67,7 +67,7 @@ def decode_one_audio_mossformer2_ss_16k(model, device, inputs, args):
67
  """
68
  out = [] # Initialize the list to store outputs
69
  decode_do_segment = False # Flag to determine if segmentation is needed
70
- window = args.sampling_rate * args.decode_window # Decoding window length
71
  stride = int(window * 0.75) # Decoding stride if segmentation is used
72
  b, t = inputs.shape # Get batch size and input length
73
 
@@ -142,7 +142,7 @@ def decode_one_audio_frcrn_se_16k(model, device, inputs, args):
142
  """
143
  decode_do_segment = False # Flag to determine if segmentation is needed
144
 
145
- window = args.sampling_rate * args.decode_window # Decoding window length
146
  stride = int(window * 0.75) # Decoding stride for segmenting the input
147
  b, t = inputs.shape # Get batch size (b) and input length (t)
148
 
@@ -210,7 +210,7 @@ def decode_one_audio_mossformergan_se_16k(model, device, inputs, args):
210
  numpy.ndarray: The decoded audio output, which has been enhanced by the model.
211
  """
212
  decode_do_segment = False # Flag to determine if segmentation is needed
213
- window = args.sampling_rate * args.decode_window # Decoding window length
214
  stride = int(window * 0.75) # Decoding stride for segmenting the input
215
  b, t = inputs.shape # Get batch size (b) and input length (t)
216
 
@@ -274,7 +274,7 @@ def _decode_one_audio_mossformergan_se_16k(model, device, inputs, norm_factor, a
274
  """
275
  input_len = inputs.size(-1) # Get the length of the input audio
276
  nframe = int(np.ceil(input_len / args.win_inc)) # Calculate the number of frames based on window increment
277
- padded_len = nframe * args.win_inc # Calculate the padded length to fit the model
278
  padding_len = padded_len - input_len # Determine how much padding is needed
279
 
280
  # Pad the input audio with the beginning of the input
 
67
  """
68
  out = [] # Initialize the list to store outputs
69
  decode_do_segment = False # Flag to determine if segmentation is needed
70
+ window = int(args.sampling_rate * args.decode_window) # Decoding window length
71
  stride = int(window * 0.75) # Decoding stride if segmentation is used
72
  b, t = inputs.shape # Get batch size and input length
73
 
 
142
  """
143
  decode_do_segment = False # Flag to determine if segmentation is needed
144
 
145
+ window = int(args.sampling_rate * args.decode_window) # Decoding window length
146
  stride = int(window * 0.75) # Decoding stride for segmenting the input
147
  b, t = inputs.shape # Get batch size (b) and input length (t)
148
 
 
210
  numpy.ndarray: The decoded audio output, which has been enhanced by the model.
211
  """
212
  decode_do_segment = False # Flag to determine if segmentation is needed
213
+ window = int(args.sampling_rate * args.decode_window) # Decoding window length
214
  stride = int(window * 0.75) # Decoding stride for segmenting the input
215
  b, t = inputs.shape # Get batch size (b) and input length (t)
216
 
 
274
  """
275
  input_len = inputs.size(-1) # Get the length of the input audio
276
  nframe = int(np.ceil(input_len / args.win_inc)) # Calculate the number of frames based on window increment
277
+ padded_len = int(nframe * args.win_inc) # Calculate the padded length to fit the model
278
  padding_len = padded_len - input_len # Determine how much padding is needed
279
 
280
  # Pad the input audio with the beginning of the input