feat: update infer
Browse files- config.py +0 -2
- vc_infer_pipeline.py +13 -11
config.py
CHANGED
@@ -71,8 +71,6 @@ class Config:
|
|
71 |
/ 1024
|
72 |
+ 0.4
|
73 |
)
|
74 |
-
if self.gpu_mem <= 4:
|
75 |
-
nope = None
|
76 |
elif torch.backends.mps.is_available():
|
77 |
print("没有发现支持的N卡, 使用MPS进行推理")
|
78 |
self.device = "mps"
|
|
|
71 |
/ 1024
|
72 |
+ 0.4
|
73 |
)
|
|
|
|
|
74 |
elif torch.backends.mps.is_available():
|
75 |
print("没有发现支持的N卡, 使用MPS进行推理")
|
76 |
self.device = "mps"
|
vc_infer_pipeline.py
CHANGED
@@ -162,7 +162,7 @@ class VC(object):
|
|
162 |
big_npy,
|
163 |
index_rate,
|
164 |
version,
|
165 |
-
protect
|
166 |
): # ,file_index,file_big_npy
|
167 |
feats = torch.from_numpy(audio0)
|
168 |
if self.is_half:
|
@@ -184,8 +184,8 @@ class VC(object):
|
|
184 |
with torch.no_grad():
|
185 |
logits = model.extract_features(**inputs)
|
186 |
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
|
187 |
-
if
|
188 |
-
feats0=feats.clone()
|
189 |
if (
|
190 |
isinstance(index, type(None)) == False
|
191 |
and isinstance(big_npy, type(None)) == False
|
@@ -211,8 +211,10 @@ class VC(object):
|
|
211 |
)
|
212 |
|
213 |
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
214 |
-
if
|
215 |
-
feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
|
|
|
|
|
216 |
t1 = ttime()
|
217 |
p_len = audio0.shape[0] // self.window
|
218 |
if feats.shape[1] < p_len:
|
@@ -221,13 +223,13 @@ class VC(object):
|
|
221 |
pitch = pitch[:, :p_len]
|
222 |
pitchf = pitchf[:, :p_len]
|
223 |
|
224 |
-
if
|
225 |
pitchff = pitchf.clone()
|
226 |
pitchff[pitchf > 0] = 1
|
227 |
pitchff[pitchf < 1] = protect
|
228 |
pitchff = pitchff.unsqueeze(-1)
|
229 |
feats = feats * pitchff + feats0 * (1 - pitchff)
|
230 |
-
feats=feats.to(feats0.dtype)
|
231 |
p_len = torch.tensor([p_len], device=self.device).long()
|
232 |
with torch.no_grad():
|
233 |
if pitch != None and pitchf != None:
|
@@ -356,7 +358,7 @@ class VC(object):
|
|
356 |
big_npy,
|
357 |
index_rate,
|
358 |
version,
|
359 |
-
protect
|
360 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
361 |
)
|
362 |
else:
|
@@ -373,7 +375,7 @@ class VC(object):
|
|
373 |
big_npy,
|
374 |
index_rate,
|
375 |
version,
|
376 |
-
protect
|
377 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
378 |
)
|
379 |
s = t
|
@@ -391,7 +393,7 @@ class VC(object):
|
|
391 |
big_npy,
|
392 |
index_rate,
|
393 |
version,
|
394 |
-
protect
|
395 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
396 |
)
|
397 |
else:
|
@@ -408,7 +410,7 @@ class VC(object):
|
|
408 |
big_npy,
|
409 |
index_rate,
|
410 |
version,
|
411 |
-
protect
|
412 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
413 |
)
|
414 |
audio_opt = np.concatenate(audio_opt)
|
|
|
162 |
big_npy,
|
163 |
index_rate,
|
164 |
version,
|
165 |
+
protect,
|
166 |
): # ,file_index,file_big_npy
|
167 |
feats = torch.from_numpy(audio0)
|
168 |
if self.is_half:
|
|
|
184 |
with torch.no_grad():
|
185 |
logits = model.extract_features(**inputs)
|
186 |
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
|
187 |
+
if protect < 0.5:
|
188 |
+
feats0 = feats.clone()
|
189 |
if (
|
190 |
isinstance(index, type(None)) == False
|
191 |
and isinstance(big_npy, type(None)) == False
|
|
|
211 |
)
|
212 |
|
213 |
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
214 |
+
if protect < 0.5:
|
215 |
+
feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
|
216 |
+
0, 2, 1
|
217 |
+
)
|
218 |
t1 = ttime()
|
219 |
p_len = audio0.shape[0] // self.window
|
220 |
if feats.shape[1] < p_len:
|
|
|
223 |
pitch = pitch[:, :p_len]
|
224 |
pitchf = pitchf[:, :p_len]
|
225 |
|
226 |
+
if protect < 0.5:
|
227 |
pitchff = pitchf.clone()
|
228 |
pitchff[pitchf > 0] = 1
|
229 |
pitchff[pitchf < 1] = protect
|
230 |
pitchff = pitchff.unsqueeze(-1)
|
231 |
feats = feats * pitchff + feats0 * (1 - pitchff)
|
232 |
+
feats = feats.to(feats0.dtype)
|
233 |
p_len = torch.tensor([p_len], device=self.device).long()
|
234 |
with torch.no_grad():
|
235 |
if pitch != None and pitchf != None:
|
|
|
358 |
big_npy,
|
359 |
index_rate,
|
360 |
version,
|
361 |
+
protect,
|
362 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
363 |
)
|
364 |
else:
|
|
|
375 |
big_npy,
|
376 |
index_rate,
|
377 |
version,
|
378 |
+
protect,
|
379 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
380 |
)
|
381 |
s = t
|
|
|
393 |
big_npy,
|
394 |
index_rate,
|
395 |
version,
|
396 |
+
protect,
|
397 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
398 |
)
|
399 |
else:
|
|
|
410 |
big_npy,
|
411 |
index_rate,
|
412 |
version,
|
413 |
+
protect,
|
414 |
)[self.t_pad_tgt : -self.t_pad_tgt]
|
415 |
)
|
416 |
audio_opt = np.concatenate(audio_opt)
|