Update vc_infer_pipeline.py
Browse files- vc_infer_pipeline.py +11 -39
vc_infer_pipeline.py
CHANGED
@@ -205,6 +205,9 @@ class VC(object):
|
|
205 |
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
|
206 |
if protect < 0.5 and pitch is not None and pitchf is not None:
|
207 |
feats0 = feats.clone()
|
|
|
|
|
|
|
208 |
if (
|
209 |
index is not None
|
210 |
and big_npy is not None
|
@@ -265,7 +268,7 @@ class VC(object):
|
|
265 |
times[2] += t2 - t1
|
266 |
logger.info(f"VC output shape: {audio1.shape}")
|
267 |
return audio1
|
268 |
-
|
269 |
def pipeline(
|
270 |
self,
|
271 |
model,
|
@@ -318,6 +321,11 @@ class VC(object):
|
|
318 |
)[0][0]
|
319 |
)
|
320 |
logger.info(f"Number of opt_ts: {len(opt_ts)}")
|
|
|
|
|
|
|
|
|
|
|
321 |
s = 0
|
322 |
audio_opt = []
|
323 |
t = None
|
@@ -392,45 +400,9 @@ class VC(object):
|
|
392 |
logger.info(f"Segment {i+1} shape: {segment.shape}")
|
393 |
audio_opt.append(segment)
|
394 |
s = t
|
395 |
-
if t is not None:
|
396 |
-
logger.info("Processing final segment")
|
397 |
-
if if_f0 == 1:
|
398 |
-
audio_opt.append(
|
399 |
-
self.vc(
|
400 |
-
model,
|
401 |
-
net_g,
|
402 |
-
sid,
|
403 |
-
audio_pad[t:],
|
404 |
-
pitch[:, t // self.window :],
|
405 |
-
pitchf[:, t // self.window :],
|
406 |
-
times,
|
407 |
-
index,
|
408 |
-
big_npy,
|
409 |
-
index_rate,
|
410 |
-
version,
|
411 |
-
protect,
|
412 |
-
)[self.t_pad_tgt : -self.t_pad_tgt]
|
413 |
-
)
|
414 |
-
else:
|
415 |
-
audio_opt.append(
|
416 |
-
self.vc(
|
417 |
-
model,
|
418 |
-
net_g,
|
419 |
-
sid,
|
420 |
-
audio_pad[t:],
|
421 |
-
None,
|
422 |
-
None,
|
423 |
-
times,
|
424 |
-
index,
|
425 |
-
big_npy,
|
426 |
-
index_rate,
|
427 |
-
version,
|
428 |
-
protect,
|
429 |
-
)[self.t_pad_tgt : -self.t_pad_tgt]
|
430 |
-
)
|
431 |
-
|
432 |
-
logger.info(f"Number of audio segments: {len(audio_opt)}")
|
433 |
|
|
|
|
|
434 |
if not audio_opt:
|
435 |
raise ValueError("No audio segments were generated")
|
436 |
|
|
|
205 |
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
|
206 |
if protect < 0.5 and pitch is not None and pitchf is not None:
|
207 |
feats0 = feats.clone()
|
208 |
+
|
209 |
+
logger.info(f"Feats shape after processing: {feats.shape}")
|
210 |
+
|
211 |
if (
|
212 |
index is not None
|
213 |
and big_npy is not None
|
|
|
268 |
times[2] += t2 - t1
|
269 |
logger.info(f"VC output shape: {audio1.shape}")
|
270 |
return audio1
|
271 |
+
|
272 |
def pipeline(
|
273 |
self,
|
274 |
model,
|
|
|
321 |
)[0][0]
|
322 |
)
|
323 |
logger.info(f"Number of opt_ts: {len(opt_ts)}")
|
324 |
+
|
325 |
+
if len(opt_ts) == 0:
|
326 |
+
logger.info("No optimal time steps found. Processing entire audio.")
|
327 |
+
opt_ts = [audio.shape[0]]
|
328 |
+
|
329 |
s = 0
|
330 |
audio_opt = []
|
331 |
t = None
|
|
|
400 |
logger.info(f"Segment {i+1} shape: {segment.shape}")
|
401 |
audio_opt.append(segment)
|
402 |
s = t
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
+
logger.info(f"Number of audio segments: {len(audio_opt)}")
|
405 |
+
|
406 |
if not audio_opt:
|
407 |
raise ValueError("No audio segments were generated")
|
408 |
|