MAZALA2024 commited on
Commit
8077420
·
verified ·
1 Parent(s): b1745b8

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +11 -39
vc_infer_pipeline.py CHANGED
@@ -205,6 +205,9 @@ class VC(object):
205
  feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
206
  if protect < 0.5 and pitch is not None and pitchf is not None:
207
  feats0 = feats.clone()
 
 
 
208
  if (
209
  index is not None
210
  and big_npy is not None
@@ -265,7 +268,7 @@ class VC(object):
265
  times[2] += t2 - t1
266
  logger.info(f"VC output shape: {audio1.shape}")
267
  return audio1
268
-
269
  def pipeline(
270
  self,
271
  model,
@@ -318,6 +321,11 @@ class VC(object):
318
  )[0][0]
319
  )
320
  logger.info(f"Number of opt_ts: {len(opt_ts)}")
 
 
 
 
 
321
  s = 0
322
  audio_opt = []
323
  t = None
@@ -392,45 +400,9 @@ class VC(object):
392
  logger.info(f"Segment {i+1} shape: {segment.shape}")
393
  audio_opt.append(segment)
394
  s = t
395
- if t is not None:
396
- logger.info("Processing final segment")
397
- if if_f0 == 1:
398
- audio_opt.append(
399
- self.vc(
400
- model,
401
- net_g,
402
- sid,
403
- audio_pad[t:],
404
- pitch[:, t // self.window :],
405
- pitchf[:, t // self.window :],
406
- times,
407
- index,
408
- big_npy,
409
- index_rate,
410
- version,
411
- protect,
412
- )[self.t_pad_tgt : -self.t_pad_tgt]
413
- )
414
- else:
415
- audio_opt.append(
416
- self.vc(
417
- model,
418
- net_g,
419
- sid,
420
- audio_pad[t:],
421
- None,
422
- None,
423
- times,
424
- index,
425
- big_npy,
426
- index_rate,
427
- version,
428
- protect,
429
- )[self.t_pad_tgt : -self.t_pad_tgt]
430
- )
431
-
432
- logger.info(f"Number of audio segments: {len(audio_opt)}")
433
 
 
 
434
  if not audio_opt:
435
  raise ValueError("No audio segments were generated")
436
 
 
205
  feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
206
  if protect < 0.5 and pitch is not None and pitchf is not None:
207
  feats0 = feats.clone()
208
+
209
+ logger.info(f"Feats shape after processing: {feats.shape}")
210
+
211
  if (
212
  index is not None
213
  and big_npy is not None
 
268
  times[2] += t2 - t1
269
  logger.info(f"VC output shape: {audio1.shape}")
270
  return audio1
271
+
272
  def pipeline(
273
  self,
274
  model,
 
321
  )[0][0]
322
  )
323
  logger.info(f"Number of opt_ts: {len(opt_ts)}")
324
+
325
+ if len(opt_ts) == 0:
326
+ logger.info("No optimal time steps found. Processing entire audio.")
327
+ opt_ts = [audio.shape[0]]
328
+
329
  s = 0
330
  audio_opt = []
331
  t = None
 
400
  logger.info(f"Segment {i+1} shape: {segment.shape}")
401
  audio_opt.append(segment)
402
  s = t
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
+ logger.info(f"Number of audio segments: {len(audio_opt)}")
405
+
406
  if not audio_opt:
407
  raise ValueError("No audio segments were generated")
408