Update vc_infer_pipeline.py
Browse files- vc_infer_pipeline.py +126 -2
vc_infer_pipeline.py
CHANGED
@@ -263,7 +263,131 @@ class VC(object):
|
|
263 |
times[2] += t2 - t1
|
264 |
return audio1
|
265 |
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
if if_f0 == 1:
|
268 |
audio_piece = self.process_batch(
|
269 |
model,
|
@@ -308,7 +432,7 @@ class VC(object):
|
|
308 |
audio_opt = (audio_opt * max_int16).astype(np.int16)
|
309 |
|
310 |
return audio_opt
|
311 |
-
|
312 |
def parallel_pipeline(self, tasks):
|
313 |
with ThreadPoolExecutor() as executor:
|
314 |
futures = [executor.submit(self.pipeline, *task) for task in tasks]
|
|
|
263 |
times[2] += t2 - t1
|
264 |
return audio1
|
265 |
|
266 |
+
def pipeline(
|
267 |
+
self,
|
268 |
+
model,
|
269 |
+
net_g,
|
270 |
+
sid,
|
271 |
+
audio,
|
272 |
+
input_audio_path,
|
273 |
+
times,
|
274 |
+
f0_up_key,
|
275 |
+
f0_method,
|
276 |
+
file_index,
|
277 |
+
index_rate,
|
278 |
+
if_f0,
|
279 |
+
filter_radius,
|
280 |
+
tgt_sr,
|
281 |
+
resample_sr,
|
282 |
+
rms_mix_rate,
|
283 |
+
version,
|
284 |
+
protect,
|
285 |
+
f0_file=None,
|
286 |
+
):
|
287 |
+
if (
|
288 |
+
file_index != ""
|
289 |
+
and os.path.exists(file_index)
|
290 |
+
and index_rate != 0
|
291 |
+
):
|
292 |
+
try:
|
293 |
+
index = faiss.read_index(file_index)
|
294 |
+
big_npy = index.reconstruct_n(0, index.ntotal)
|
295 |
+
except:
|
296 |
+
traceback.print_exc()
|
297 |
+
index = big_npy = None
|
298 |
+
else:
|
299 |
+
index = big_npy = None
|
300 |
+
audio = signal.filtfilt(bh, ah, audio)
|
301 |
+
audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
|
302 |
+
opt_ts = []
|
303 |
+
if audio_pad.shape[0] > self.t_max:
|
304 |
+
audio_sum = np.zeros_like(audio)
|
305 |
+
for i in range(self.window):
|
306 |
+
audio_sum += audio_pad[i : i - self.window]
|
307 |
+
for t in range(self.t_center, audio.shape[0], self.t_center):
|
308 |
+
opt_ts.append(
|
309 |
+
t
|
310 |
+
- self.t_query
|
311 |
+
+ np.where(
|
312 |
+
np.abs(audio_sum[t - self.t_query : t + self.t_query])
|
313 |
+
== np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
|
314 |
+
)[0][0]
|
315 |
+
)
|
316 |
+
s = 0
|
317 |
+
audio_opt = []
|
318 |
+
t = None
|
319 |
+
t1 = ttime()
|
320 |
+
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
|
321 |
+
p_len = audio_pad.shape[0] // self.window
|
322 |
+
inp_f0 = None
|
323 |
+
if hasattr(f0_file, "name"):
|
324 |
+
try:
|
325 |
+
with open(f0_file.name, "r") as f:
|
326 |
+
lines = f.read().strip("\n").split("\n")
|
327 |
+
inp_f0 = []
|
328 |
+
for line in lines:
|
329 |
+
inp_f0.append([float(i) for i in line.split(",")])
|
330 |
+
inp_f0 = np.array(inp_f0, dtype="float32")
|
331 |
+
except:
|
332 |
+
traceback.print_exc()
|
333 |
+
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
|
334 |
+
pitch, pitchf = None, None
|
335 |
+
if if_f0 == 1:
|
336 |
+
pitch, pitchf = self.get_f0(
|
337 |
+
input_audio_path,
|
338 |
+
audio_pad,
|
339 |
+
p_len,
|
340 |
+
f0_up_key,
|
341 |
+
f0_method,
|
342 |
+
filter_radius,
|
343 |
+
inp_f0,
|
344 |
+
)
|
345 |
+
pitch = pitch[:p_len]
|
346 |
+
pitchf = pitchf[:p_len]
|
347 |
+
if self.device == "mps":
|
348 |
+
pitchf = pitchf.astype(np.float32)
|
349 |
+
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
|
350 |
+
pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
|
351 |
+
t2 = ttime()
|
352 |
+
times[1] += t2 - t1
|
353 |
+
for t in opt_ts:
|
354 |
+
t = t // self.window * self.window
|
355 |
+
if if_f0 == 1:
|
356 |
+
audio_opt.append(
|
357 |
+
self.vc(
|
358 |
+
model,
|
359 |
+
net_g,
|
360 |
+
sid,
|
361 |
+
audio_pad[s : t + self.t_pad2 + self.window],
|
362 |
+
pitch[:, s // self.window : (t + self.t_pad2) // self.window],
|
363 |
+
pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
|
364 |
+
times,
|
365 |
+
index,
|
366 |
+
big_npy,
|
367 |
+
index_rate,
|
368 |
+
version,
|
369 |
+
protect,
|
370 |
+
)[self.t_pad_tgt : -self.t_pad_tgt]
|
371 |
+
)
|
372 |
+
else:
|
373 |
+
audio_opt.append(
|
374 |
+
self.vc(
|
375 |
+
model,
|
376 |
+
net_g,
|
377 |
+
sid,
|
378 |
+
audio_pad[s : t + self.t_pad2 + self.window],
|
379 |
+
None,
|
380 |
+
None,
|
381 |
+
times,
|
382 |
+
index,
|
383 |
+
big_npy,
|
384 |
+
index_rate,
|
385 |
+
version,
|
386 |
+
protect,
|
387 |
+
)[self.t_pad_tgt : -self.t_pad_tgt]
|
388 |
+
)
|
389 |
+
s = t
|
390 |
+
if t is not None:
|
391 |
if if_f0 == 1:
|
392 |
audio_piece = self.process_batch(
|
393 |
model,
|
|
|
432 |
audio_opt = (audio_opt * max_int16).astype(np.int16)
|
433 |
|
434 |
return audio_opt
|
435 |
+
|
436 |
def parallel_pipeline(self, tasks):
|
437 |
with ThreadPoolExecutor() as executor:
|
438 |
futures = [executor.submit(self.pipeline, *task) for task in tasks]
|