MAZALA2024 commited on
Commit
c5224f7
·
verified ·
1 Parent(s): 0741c2d

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +11 -11
vc_infer_pipeline.py CHANGED
@@ -169,7 +169,7 @@ class VC(object):
169
  model,
170
  net_g,
171
  sid,
172
- audio0,
173
  pitch,
174
  pitchf,
175
  times,
@@ -179,7 +179,8 @@ class VC(object):
179
  version,
180
  protect,
181
  ):
182
- feats = torch.from_numpy(audio0)
 
183
  if self.is_half:
184
  feats = feats.half()
185
  else:
@@ -189,7 +190,7 @@ class VC(object):
189
  assert feats.dim() == 1, feats.dim()
190
  feats = feats.view(1, -1)
191
  padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
192
-
193
  inputs = {
194
  "source": feats.to(self.device),
195
  "padding_mask": padding_mask,
@@ -209,32 +210,30 @@ class VC(object):
209
  npy = feats[0].cpu().numpy()
210
  if self.is_half:
211
  npy = npy.astype("float32")
212
-
213
  score, ix = index.search(npy, k=8)
214
  weight = np.square(1 / score)
215
  weight /= weight.sum(axis=1, keepdims=True)
216
  npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
217
-
218
  if self.is_half:
219
  npy = npy.astype("float16")
220
  feats = (
221
  torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
222
  + (1 - index_rate) * feats
223
  )
224
-
225
  feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
226
  if protect < 0.5 and pitch is not None and pitchf is not None:
227
- feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
228
- 0, 2, 1
229
- )
230
  t1 = ttime()
231
- p_len = audio0.shape[0] // self.window
232
  if feats.shape[1] < p_len:
233
  p_len = feats.shape[1]
234
  if pitch is not None and pitchf is not None:
235
  pitch = pitch[:, :p_len]
236
  pitchf = pitchf[:, :p_len]
237
-
238
  if protect < 0.5 and pitch is not None and pitchf is not None:
239
  pitchff = pitchf.clone()
240
  pitchff[pitchf > 0] = 1
@@ -261,6 +260,7 @@ class VC(object):
261
  t2 = ttime()
262
  times[0] += t1 - t0
263
  times[2] += t2 - t1
 
264
  return audio1
265
 
266
  def pipeline(
 
169
  model,
170
  net_g,
171
  sid,
172
+ audio,
173
  pitch,
174
  pitchf,
175
  times,
 
179
  version,
180
  protect,
181
  ):
182
+ logger.info(f"VC input shape: {audio.shape}")
183
+ feats = torch.from_numpy(audio)
184
  if self.is_half:
185
  feats = feats.half()
186
  else:
 
190
  assert feats.dim() == 1, feats.dim()
191
  feats = feats.view(1, -1)
192
  padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
193
+
194
  inputs = {
195
  "source": feats.to(self.device),
196
  "padding_mask": padding_mask,
 
210
  npy = feats[0].cpu().numpy()
211
  if self.is_half:
212
  npy = npy.astype("float32")
213
+
214
  score, ix = index.search(npy, k=8)
215
  weight = np.square(1 / score)
216
  weight /= weight.sum(axis=1, keepdims=True)
217
  npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
218
+
219
  if self.is_half:
220
  npy = npy.astype("float16")
221
  feats = (
222
  torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
223
  + (1 - index_rate) * feats
224
  )
225
+
226
  feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
227
  if protect < 0.5 and pitch is not None and pitchf is not None:
228
+ feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
 
 
229
  t1 = ttime()
230
+ p_len = audio.shape[0] // self.window
231
  if feats.shape[1] < p_len:
232
  p_len = feats.shape[1]
233
  if pitch is not None and pitchf is not None:
234
  pitch = pitch[:, :p_len]
235
  pitchf = pitchf[:, :p_len]
236
+
237
  if protect < 0.5 and pitch is not None and pitchf is not None:
238
  pitchff = pitchf.clone()
239
  pitchff[pitchf > 0] = 1
 
260
  t2 = ttime()
261
  times[0] += t1 - t0
262
  times[2] += t2 - t1
263
+ logger.info(f"VC output shape: {audio1.shape}")
264
  return audio1
265
 
266
  def pipeline(