File size: 5,393 Bytes
96ea114 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from multiprocessing import cpu_count
from pathlib import Path
import torch
from fairseq import checkpoint_utils
from scipy.io import wavfile
from infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
from my_utils import load_audio
from vc_infer_pipeline import VC
BASE_DIR = Path(__file__).resolve().parent.parent
class Config:
def __init__(self, device, is_half):
self.device = device
self.is_half = is_half
self.n_cpu = 0
self.gpu_name = None
self.gpu_mem = None
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
def device_config(self) -> tuple:
if torch.cuda.is_available():
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
or "P40" in self.gpu_name.upper()
or "1060" in self.gpu_name
or "1070" in self.gpu_name
or "1080" in self.gpu_name
):
print("16 series/10 series P40 forced single precision")
self.is_half = False
for config_file in ["32k.json", "40k.json", "48k.json"]:
with open(BASE_DIR / "src" / "configs" / config_file, "r") as f:
strr = f.read().replace("true", "false")
with open(BASE_DIR / "src" / "configs" / config_file, "w") as f:
f.write(strr)
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
strr = f.read().replace("3.7", "3.0")
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
else:
self.gpu_name = None
self.gpu_mem = int(
torch.cuda.get_device_properties(i_device).total_memory
/ 1024
/ 1024
/ 1024
+ 0.4
)
if self.gpu_mem <= 4:
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
strr = f.read().replace("3.7", "3.0")
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
elif torch.backends.mps.is_available():
print("No supported N-card found, use MPS for inference")
self.device = "mps"
else:
print("No supported N-card found, use CPU for inference")
self.device = "cpu"
self.is_half = True
if self.n_cpu == 0:
self.n_cpu = cpu_count()
if self.is_half:
# 6G memory config
x_pad = 3
x_query = 10
x_center = 60
x_max = 65
else:
# 5G memory config
x_pad = 1
x_query = 6
x_center = 38
x_max = 41
if self.gpu_mem != None and self.gpu_mem <= 4:
x_pad = 1
x_query = 5
x_center = 30
x_max = 32
return x_pad, x_query, x_center, x_max
def load_hubert(device, is_half, model_path):
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path], suffix='', )
hubert = models[0]
hubert = hubert.to(device)
if is_half:
hubert = hubert.half()
else:
hubert = hubert.float()
hubert.eval()
return hubert
def get_vc(device, is_half, config, model_path):
cpt = torch.load(model_path, map_location='cpu')
if "config" not in cpt or "weight" not in cpt:
raise ValueError(f'Incorrect format for {model_path}. Use a voice model trained using RVC v2 instead.')
tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(device)
if is_half:
net_g = net_g.half()
else:
net_g = net_g.float()
vc = VC(tgt_sr, config)
return cpt, version, net_g, tgt_sr, vc
def rvc_infer(index_path, index_rate, input_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model):
audio = load_audio(input_path, 16000)
times = [0, 0, 0]
if_f0 = cpt.get('f0', 1)
audio_opt = vc.pipeline(hubert_model, net_g, 0, audio, input_path, times, pitch_change, f0_method, index_path, index_rate, if_f0, filter_radius, tgt_sr, 0, rms_mix_rate, version, protect, crepe_hop_length)
wavfile.write(output_path, tgt_sr, audio_opt)
|