MAZALA2024 commited on
Commit
f72b616
·
verified ·
1 Parent(s): d8978c2

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +126 -2
vc_infer_pipeline.py CHANGED
@@ -263,7 +263,131 @@ class VC(object):
263
  times[2] += t2 - t1
264
  return audio1
265
 
266
- if t is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  if if_f0 == 1:
268
  audio_piece = self.process_batch(
269
  model,
@@ -308,7 +432,7 @@ class VC(object):
308
  audio_opt = (audio_opt * max_int16).astype(np.int16)
309
 
310
  return audio_opt
311
-
312
  def parallel_pipeline(self, tasks):
313
  with ThreadPoolExecutor() as executor:
314
  futures = [executor.submit(self.pipeline, *task) for task in tasks]
 
263
  times[2] += t2 - t1
264
  return audio1
265
 
266
+ def pipeline(
267
+ self,
268
+ model,
269
+ net_g,
270
+ sid,
271
+ audio,
272
+ input_audio_path,
273
+ times,
274
+ f0_up_key,
275
+ f0_method,
276
+ file_index,
277
+ index_rate,
278
+ if_f0,
279
+ filter_radius,
280
+ tgt_sr,
281
+ resample_sr,
282
+ rms_mix_rate,
283
+ version,
284
+ protect,
285
+ f0_file=None,
286
+ ):
287
+ if (
288
+ file_index != ""
289
+ and os.path.exists(file_index)
290
+ and index_rate != 0
291
+ ):
292
+ try:
293
+ index = faiss.read_index(file_index)
294
+ big_npy = index.reconstruct_n(0, index.ntotal)
295
+ except:
296
+ traceback.print_exc()
297
+ index = big_npy = None
298
+ else:
299
+ index = big_npy = None
300
+ audio = signal.filtfilt(bh, ah, audio)
301
+ audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
302
+ opt_ts = []
303
+ if audio_pad.shape[0] > self.t_max:
304
+ audio_sum = np.zeros_like(audio)
305
+ for i in range(self.window):
306
+ audio_sum += audio_pad[i : i - self.window]
307
+ for t in range(self.t_center, audio.shape[0], self.t_center):
308
+ opt_ts.append(
309
+ t
310
+ - self.t_query
311
+ + np.where(
312
+ np.abs(audio_sum[t - self.t_query : t + self.t_query])
313
+ == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
314
+ )[0][0]
315
+ )
316
+ s = 0
317
+ audio_opt = []
318
+ t = None
319
+ t1 = ttime()
320
+ audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
321
+ p_len = audio_pad.shape[0] // self.window
322
+ inp_f0 = None
323
+ if hasattr(f0_file, "name"):
324
+ try:
325
+ with open(f0_file.name, "r") as f:
326
+ lines = f.read().strip("\n").split("\n")
327
+ inp_f0 = []
328
+ for line in lines:
329
+ inp_f0.append([float(i) for i in line.split(",")])
330
+ inp_f0 = np.array(inp_f0, dtype="float32")
331
+ except:
332
+ traceback.print_exc()
333
+ sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
334
+ pitch, pitchf = None, None
335
+ if if_f0 == 1:
336
+ pitch, pitchf = self.get_f0(
337
+ input_audio_path,
338
+ audio_pad,
339
+ p_len,
340
+ f0_up_key,
341
+ f0_method,
342
+ filter_radius,
343
+ inp_f0,
344
+ )
345
+ pitch = pitch[:p_len]
346
+ pitchf = pitchf[:p_len]
347
+ if self.device == "mps":
348
+ pitchf = pitchf.astype(np.float32)
349
+ pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
350
+ pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
351
+ t2 = ttime()
352
+ times[1] += t2 - t1
353
+ for t in opt_ts:
354
+ t = t // self.window * self.window
355
+ if if_f0 == 1:
356
+ audio_opt.append(
357
+ self.vc(
358
+ model,
359
+ net_g,
360
+ sid,
361
+ audio_pad[s : t + self.t_pad2 + self.window],
362
+ pitch[:, s // self.window : (t + self.t_pad2) // self.window],
363
+ pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
364
+ times,
365
+ index,
366
+ big_npy,
367
+ index_rate,
368
+ version,
369
+ protect,
370
+ )[self.t_pad_tgt : -self.t_pad_tgt]
371
+ )
372
+ else:
373
+ audio_opt.append(
374
+ self.vc(
375
+ model,
376
+ net_g,
377
+ sid,
378
+ audio_pad[s : t + self.t_pad2 + self.window],
379
+ None,
380
+ None,
381
+ times,
382
+ index,
383
+ big_npy,
384
+ index_rate,
385
+ version,
386
+ protect,
387
+ )[self.t_pad_tgt : -self.t_pad_tgt]
388
+ )
389
+ s = t
390
+ if t is not None:
391
  if if_f0 == 1:
392
  audio_piece = self.process_batch(
393
  model,
 
432
  audio_opt = (audio_opt * max_int16).astype(np.int16)
433
 
434
  return audio_opt
435
+
436
  def parallel_pipeline(self, tasks):
437
  with ThreadPoolExecutor() as executor:
438
  futures = [executor.submit(self.pipeline, *task) for task in tasks]