MAZALA2024 commited on
Commit
454fba8
·
verified ·
1 Parent(s): c5224f7

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +46 -38
vc_infer_pipeline.py CHANGED
@@ -284,6 +284,7 @@ class VC(object):
284
  protect,
285
  f0_file=None,
286
  ):
 
287
  if (
288
  file_index != ""
289
  and os.path.exists(file_index)
@@ -313,6 +314,7 @@ class VC(object):
313
  == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
314
  )[0][0]
315
  )
 
316
  s = 0
317
  audio_opt = []
318
  t = None
@@ -350,17 +352,54 @@ class VC(object):
350
  pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
351
  t2 = ttime()
352
  times[1] += t2 - t1
353
- for t in opt_ts:
354
  t = t // self.window * self.window
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  if if_f0 == 1:
356
  audio_opt.append(
357
  self.vc(
358
  model,
359
  net_g,
360
  sid,
361
- audio_pad[s : t + self.t_pad2 + self.window],
362
- pitch[:, s // self.window : (t + self.t_pad2) // self.window],
363
- pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
364
  times,
365
  index,
366
  big_npy,
@@ -375,7 +414,7 @@ class VC(object):
375
  model,
376
  net_g,
377
  sid,
378
- audio_pad[s : t + self.t_pad2 + self.window],
379
  None,
380
  None,
381
  times,
@@ -386,39 +425,8 @@ class VC(object):
386
  protect,
387
  )[self.t_pad_tgt : -self.t_pad_tgt]
388
  )
389
- s = t
390
- if t is not None:
391
- if if_f0 == 1:
392
- audio_piece = self.process_batch(
393
- model,
394
- net_g,
395
- sid,
396
- [audio_pad[t:]],
397
- [pitch[:, t // self.window :]],
398
- [pitchf[:, t // self.window :]],
399
- times,
400
- index,
401
- big_npy,
402
- index_rate,
403
- version,
404
- protect,
405
- )[0]
406
- else:
407
- audio_piece = self.process_batch(
408
- model,
409
- net_g,
410
- sid,
411
- [audio_pad[t:]],
412
- None,
413
- None,
414
- times,
415
- index,
416
- big_npy,
417
- index_rate,
418
- version,
419
- protect,
420
- )[0]
421
- audio_opt.append(audio_piece[self.t_pad_tgt : -self.t_pad_tgt])
422
 
423
  if not audio_opt:
424
  raise ValueError("No audio segments were generated")
 
284
  protect,
285
  f0_file=None,
286
  ):
287
+ logger.info(f"Starting pipeline with audio shape: {audio.shape}")
288
  if (
289
  file_index != ""
290
  and os.path.exists(file_index)
 
314
  == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
315
  )[0][0]
316
  )
317
+ logger.info(f"Number of opt_ts: {len(opt_ts)}")
318
  s = 0
319
  audio_opt = []
320
  t = None
 
352
  pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
353
  t2 = ttime()
354
  times[1] += t2 - t1
355
+ for i, t in enumerate(opt_ts):
356
  t = t // self.window * self.window
357
+ logger.info(f"Processing segment {i+1}/{len(opt_ts)}")
358
+ if if_f0 == 1:
359
+ segment = self.vc(
360
+ model,
361
+ net_g,
362
+ sid,
363
+ audio_pad[s : t + self.t_pad2 + self.window],
364
+ pitch[:, s // self.window : (t + self.t_pad2) // self.window],
365
+ pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
366
+ times,
367
+ index,
368
+ big_npy,
369
+ index_rate,
370
+ version,
371
+ protect,
372
+ )[self.t_pad_tgt : -self.t_pad_tgt]
373
+ else:
374
+ segment = self.vc(
375
+ model,
376
+ net_g,
377
+ sid,
378
+ audio_pad[s : t + self.t_pad2 + self.window],
379
+ None,
380
+ None,
381
+ times,
382
+ index,
383
+ big_npy,
384
+ index_rate,
385
+ version,
386
+ protect,
387
+ )[self.t_pad_tgt : -self.t_pad_tgt]
388
+
389
+ logger.info(f"Segment {i+1} shape: {segment.shape}")
390
+ audio_opt.append(segment)
391
+ s = t
392
+ if t is not None:
393
+ logger.info("Processing final segment")
394
  if if_f0 == 1:
395
  audio_opt.append(
396
  self.vc(
397
  model,
398
  net_g,
399
  sid,
400
+ audio_pad[t:],
401
+ pitch[:, t // self.window :],
402
+ pitchf[:, t // self.window :],
403
  times,
404
  index,
405
  big_npy,
 
414
  model,
415
  net_g,
416
  sid,
417
+ audio_pad[t:],
418
  None,
419
  None,
420
  times,
 
425
  protect,
426
  )[self.t_pad_tgt : -self.t_pad_tgt]
427
  )
428
+
429
+ logger.info(f"Number of audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
  if not audio_opt:
432
  raise ValueError("No audio segments were generated")