Spaces:
Running
Running
darksakura
commited on
Commit
·
5b74e25
1
Parent(s):
46bab9a
Upload 85 files
Browse files- inference/__pycache__/__init__.cpython-38.pyc +0 -0
- inference/__pycache__/infer_tool_webui.cpython-38.pyc +0 -0
- inference/__pycache__/slicer.cpython-38.pyc +0 -0
- inference/infer_tool.py +1 -0
- inference/infer_tool_webui.py +22 -12
- modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/FCPEF0Predictor.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/__init__.cpython-38.pyc +0 -0
- modules/F0Predictor/__pycache__/crepe.cpython-38.pyc +0 -0
- modules/F0Predictor/fcpe/__pycache__/__init__.cpython-38.pyc +0 -0
- modules/F0Predictor/fcpe/__pycache__/model.cpython-38.pyc +0 -0
- modules/F0Predictor/fcpe/__pycache__/nvSTFT.cpython-38.pyc +0 -0
- modules/F0Predictor/fcpe/__pycache__/pcmer.cpython-38.pyc +0 -0
- modules/F0Predictor/fcpe/model.py +25 -3
- modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc +0 -0
- modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc +0 -0
- modules/__pycache__/DSConv.cpython-38.pyc +0 -0
- modules/__pycache__/__init__.cpython-38.pyc +0 -0
- modules/__pycache__/attentions.cpython-38.pyc +0 -0
- modules/__pycache__/commons.cpython-38.pyc +0 -0
- modules/__pycache__/enhancer.cpython-38.pyc +0 -0
- modules/__pycache__/losses.cpython-38.pyc +0 -0
- modules/__pycache__/mel_processing.cpython-38.pyc +0 -0
- modules/__pycache__/modules.cpython-38.pyc +0 -0
- pretrain/meta.py +8 -0
- train_diff.py +4 -3
inference/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/inference/__pycache__/__init__.cpython-38.pyc and b/inference/__pycache__/__init__.cpython-38.pyc differ
|
|
inference/__pycache__/infer_tool_webui.cpython-38.pyc
CHANGED
Binary files a/inference/__pycache__/infer_tool_webui.cpython-38.pyc and b/inference/__pycache__/infer_tool_webui.cpython-38.pyc differ
|
|
inference/__pycache__/slicer.cpython-38.pyc
CHANGED
Binary files a/inference/__pycache__/slicer.cpython-38.pyc and b/inference/__pycache__/slicer.cpython-38.pyc differ
|
|
inference/infer_tool.py
CHANGED
@@ -267,6 +267,7 @@ class Svc(object):
|
|
267 |
second_encoding = False,
|
268 |
loudness_envelope_adjustment = 1
|
269 |
):
|
|
|
270 |
wav, sr = torchaudio.load(raw_path)
|
271 |
if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
|
272 |
self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)
|
|
|
267 |
second_encoding = False,
|
268 |
loudness_envelope_adjustment = 1
|
269 |
):
|
270 |
+
torchaudio.set_audio_backend("soundfile")
|
271 |
wav, sr = torchaudio.load(raw_path)
|
272 |
if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
|
273 |
self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)
|
inference/infer_tool_webui.py
CHANGED
@@ -152,6 +152,7 @@ class Svc(object):
|
|
152 |
self.target_sample = self.diffusion_args.data.sampling_rate
|
153 |
self.hop_size = self.diffusion_args.data.block_size
|
154 |
self.spk2id = self.diffusion_args.spk
|
|
|
155 |
self.speech_encoder = self.diffusion_args.data.encoder
|
156 |
self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode is not None else 'left'
|
157 |
if spk_mix_enable:
|
@@ -203,9 +204,10 @@ class Svc(object):
|
|
203 |
|
204 |
def get_unit_f0(self, wav, tran, cluster_infer_ratio, speaker, f0_filter ,f0_predictor,cr_threshold=0.05):
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
f0, uv = f0_predictor_object.compute_f0_uv(wav)
|
|
|
209 |
if f0_filter and sum(f0) == 0:
|
210 |
raise F0FilterException("No voice detected")
|
211 |
f0 = torch.FloatTensor(f0).to(self.dev)
|
@@ -215,21 +217,24 @@ class Svc(object):
|
|
215 |
f0 = f0.unsqueeze(0)
|
216 |
uv = uv.unsqueeze(0)
|
217 |
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
220 |
c = self.hubert_model.encoder(wav16k)
|
221 |
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
|
222 |
|
223 |
if cluster_infer_ratio !=0:
|
224 |
if self.feature_retrieval:
|
225 |
speaker_id = self.spk2id.get(speaker)
|
226 |
-
if speaker_id is None:
|
227 |
-
raise RuntimeError("The name you entered is not in the speaker list!")
|
228 |
if not speaker_id and type(speaker) is int:
|
229 |
if len(self.spk2id.__dict__) >= speaker:
|
230 |
speaker_id = speaker
|
|
|
|
|
231 |
feature_index = self.cluster_model[speaker_id]
|
232 |
-
feat_np = c.transpose(0,1).cpu().numpy()
|
233 |
if self.big_npy is None or self.now_spk_id != speaker_id:
|
234 |
self.big_npy = feature_index.reconstruct_n(0, feature_index.ntotal)
|
235 |
self.now_spk_id = speaker_id
|
@@ -248,7 +253,7 @@ class Svc(object):
|
|
248 |
|
249 |
c = c.unsqueeze(0)
|
250 |
return c, f0, uv
|
251 |
-
|
252 |
def infer(self, speaker, tran, raw_path,
|
253 |
cluster_infer_ratio=0,
|
254 |
auto_predict_f0=False,
|
@@ -263,7 +268,11 @@ class Svc(object):
|
|
263 |
second_encoding = False,
|
264 |
loudness_envelope_adjustment = 1
|
265 |
):
|
266 |
-
|
|
|
|
|
|
|
|
|
267 |
if spk_mix:
|
268 |
c, f0, uv = self.get_unit_f0(wav, tran, 0, None, f0_filter,f0_predictor,cr_threshold=cr_threshold)
|
269 |
n_frames = f0.size(1)
|
@@ -299,8 +308,9 @@ class Svc(object):
|
|
299 |
if self.only_diffusion or self.shallow_diffusion:
|
300 |
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol is None else vol[:,:,None]
|
301 |
if self.shallow_diffusion and second_encoding:
|
302 |
-
|
303 |
-
|
|
|
304 |
c = self.hubert_model.encoder(audio16k)
|
305 |
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
|
306 |
f0 = f0[:,:,None]
|
|
|
152 |
self.target_sample = self.diffusion_args.data.sampling_rate
|
153 |
self.hop_size = self.diffusion_args.data.block_size
|
154 |
self.spk2id = self.diffusion_args.spk
|
155 |
+
self.dtype = torch.float32
|
156 |
self.speech_encoder = self.diffusion_args.data.encoder
|
157 |
self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode is not None else 'left'
|
158 |
if spk_mix_enable:
|
|
|
204 |
|
205 |
def get_unit_f0(self, wav, tran, cluster_infer_ratio, speaker, f0_filter ,f0_predictor,cr_threshold=0.05):
|
206 |
|
207 |
+
if not hasattr(self,"f0_predictor_object") or self.f0_predictor_object is None or f0_predictor != self.f0_predictor_object.name:
|
208 |
+
self.f0_predictor_object = utils.get_f0_predictor(f0_predictor,hop_length=self.hop_size,sampling_rate=self.target_sample,device=self.dev,threshold=cr_threshold)
|
209 |
+
f0, uv = self.f0_predictor_object.compute_f0_uv(wav)
|
210 |
+
|
211 |
if f0_filter and sum(f0) == 0:
|
212 |
raise F0FilterException("No voice detected")
|
213 |
f0 = torch.FloatTensor(f0).to(self.dev)
|
|
|
217 |
f0 = f0.unsqueeze(0)
|
218 |
uv = uv.unsqueeze(0)
|
219 |
|
220 |
+
wav = torch.from_numpy(wav).to(self.dev)
|
221 |
+
if not hasattr(self,"audio16k_resample_transform"):
|
222 |
+
self.audio16k_resample_transform = torchaudio.transforms.Resample(self.target_sample, 16000).to(self.dev)
|
223 |
+
wav16k = self.audio16k_resample_transform(wav[None,:])[0]
|
224 |
+
|
225 |
c = self.hubert_model.encoder(wav16k)
|
226 |
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
|
227 |
|
228 |
if cluster_infer_ratio !=0:
|
229 |
if self.feature_retrieval:
|
230 |
speaker_id = self.spk2id.get(speaker)
|
|
|
|
|
231 |
if not speaker_id and type(speaker) is int:
|
232 |
if len(self.spk2id.__dict__) >= speaker:
|
233 |
speaker_id = speaker
|
234 |
+
if speaker_id is None:
|
235 |
+
raise RuntimeError("The name you entered is not in the speaker list!")
|
236 |
feature_index = self.cluster_model[speaker_id]
|
237 |
+
feat_np = np.ascontiguousarray(c.transpose(0,1).cpu().numpy())
|
238 |
if self.big_npy is None or self.now_spk_id != speaker_id:
|
239 |
self.big_npy = feature_index.reconstruct_n(0, feature_index.ntotal)
|
240 |
self.now_spk_id = speaker_id
|
|
|
253 |
|
254 |
c = c.unsqueeze(0)
|
255 |
return c, f0, uv
|
256 |
+
|
257 |
def infer(self, speaker, tran, raw_path,
|
258 |
cluster_infer_ratio=0,
|
259 |
auto_predict_f0=False,
|
|
|
268 |
second_encoding = False,
|
269 |
loudness_envelope_adjustment = 1
|
270 |
):
|
271 |
+
torchaudio.set_audio_backend("soundfile")
|
272 |
+
wav, sr = torchaudio.load(raw_path)
|
273 |
+
if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
|
274 |
+
self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)
|
275 |
+
wav = self.audio_resample_transform(wav).numpy()[0]
|
276 |
if spk_mix:
|
277 |
c, f0, uv = self.get_unit_f0(wav, tran, 0, None, f0_filter,f0_predictor,cr_threshold=cr_threshold)
|
278 |
n_frames = f0.size(1)
|
|
|
308 |
if self.only_diffusion or self.shallow_diffusion:
|
309 |
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol is None else vol[:,:,None]
|
310 |
if self.shallow_diffusion and second_encoding:
|
311 |
+
if not hasattr(self,"audio16k_resample_transform"):
|
312 |
+
self.audio16k_resample_transform = torchaudio.transforms.Resample(self.target_sample, 16000).to(self.dev)
|
313 |
+
audio16k = self.audio16k_resample_transform(audio[None,:])[0]
|
314 |
c = self.hubert_model.encoder(audio16k)
|
315 |
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
|
316 |
f0 = f0[:,:,None]
|
modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc differ
|
|
modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc differ
|
|
modules/F0Predictor/__pycache__/FCPEF0Predictor.cpython-38.pyc
ADDED
Binary file (3.35 kB). View file
|
|
modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc differ
|
|
modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc differ
|
|
modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc differ
|
|
modules/F0Predictor/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/__init__.cpython-38.pyc and b/modules/F0Predictor/__pycache__/__init__.cpython-38.pyc differ
|
|
modules/F0Predictor/__pycache__/crepe.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/__pycache__/crepe.cpython-38.pyc and b/modules/F0Predictor/__pycache__/crepe.cpython-38.pyc differ
|
|
modules/F0Predictor/fcpe/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (254 Bytes). View file
|
|
modules/F0Predictor/fcpe/__pycache__/model.cpython-38.pyc
ADDED
Binary file (8.08 kB). View file
|
|
modules/F0Predictor/fcpe/__pycache__/nvSTFT.cpython-38.pyc
ADDED
Binary file (4.4 kB). View file
|
|
modules/F0Predictor/fcpe/__pycache__/pcmer.cpython-38.pyc
ADDED
Binary file (11.5 kB). View file
|
|
modules/F0Predictor/fcpe/model.py
CHANGED
@@ -84,13 +84,17 @@ class FCPE(nn.Module):
|
|
84 |
self.dense_out = weight_norm(
|
85 |
nn.Linear(n_chans, self.n_out))
|
86 |
|
87 |
-
def forward(self, mel, infer=True, gt_f0=None, return_hz_f0=False):
|
88 |
"""
|
89 |
input:
|
90 |
B x n_frames x n_unit
|
91 |
return:
|
92 |
dict of B x n_frames x feat
|
93 |
"""
|
|
|
|
|
|
|
|
|
94 |
if self.use_input_conv:
|
95 |
x = self.stack(mel.transpose(1, 2)).transpose(1, 2)
|
96 |
else:
|
@@ -108,7 +112,7 @@ class FCPE(nn.Module):
|
|
108 |
loss_all = loss_all + l2_regularization(model=self, l2_alpha=self.loss_l2_regularization_scale)
|
109 |
x = loss_all
|
110 |
if infer:
|
111 |
-
x = self.
|
112 |
x = self.cent_to_f0(x)
|
113 |
if not return_hz_f0:
|
114 |
x = (1 + x / 700).log()
|
@@ -127,6 +131,25 @@ class FCPE(nn.Module):
|
|
127 |
return rtn, confident
|
128 |
else:
|
129 |
return rtn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
def cent_to_f0(self, cent):
|
132 |
return 10. * 2 ** (cent / 1200.)
|
@@ -165,7 +188,6 @@ class FCPEInfer:
|
|
165 |
f0_min=self.args.model.f0_min,
|
166 |
confidence=self.args.model.confidence,
|
167 |
)
|
168 |
-
ckpt = torch.load(model_path, map_location=torch.device(self.device))
|
169 |
model.to(self.device).to(self.dtype)
|
170 |
model.load_state_dict(ckpt['model'])
|
171 |
model.eval()
|
|
|
84 |
self.dense_out = weight_norm(
|
85 |
nn.Linear(n_chans, self.n_out))
|
86 |
|
87 |
+
def forward(self, mel, infer=True, gt_f0=None, return_hz_f0=False, cdecoder = "local_argmax"):
|
88 |
"""
|
89 |
input:
|
90 |
B x n_frames x n_unit
|
91 |
return:
|
92 |
dict of B x n_frames x feat
|
93 |
"""
|
94 |
+
if cdecoder == "argmax":
|
95 |
+
self.cdecoder = self.cents_decoder
|
96 |
+
elif cdecoder == "local_argmax":
|
97 |
+
self.cdecoder = self.cents_local_decoder
|
98 |
if self.use_input_conv:
|
99 |
x = self.stack(mel.transpose(1, 2)).transpose(1, 2)
|
100 |
else:
|
|
|
112 |
loss_all = loss_all + l2_regularization(model=self, l2_alpha=self.loss_l2_regularization_scale)
|
113 |
x = loss_all
|
114 |
if infer:
|
115 |
+
x = self.cdecoder(x)
|
116 |
x = self.cent_to_f0(x)
|
117 |
if not return_hz_f0:
|
118 |
x = (1 + x / 700).log()
|
|
|
131 |
return rtn, confident
|
132 |
else:
|
133 |
return rtn
|
134 |
+
|
135 |
+
def cents_local_decoder(self, y, mask=True):
|
136 |
+
B, N, _ = y.size()
|
137 |
+
ci = self.cent_table[None, None, :].expand(B, N, -1)
|
138 |
+
confident, max_index = torch.max(y, dim=-1, keepdim=True)
|
139 |
+
local_argmax_index = torch.arange(0,8).to(max_index.device) + (max_index - 4)
|
140 |
+
local_argmax_index[local_argmax_index<0] = 0
|
141 |
+
local_argmax_index[local_argmax_index>=self.n_out] = self.n_out - 1
|
142 |
+
ci_l = torch.gather(ci,-1,local_argmax_index)
|
143 |
+
y_l = torch.gather(y,-1,local_argmax_index)
|
144 |
+
rtn = torch.sum(ci_l * y_l, dim=-1, keepdim=True) / torch.sum(y_l, dim=-1, keepdim=True) # cents: [B,N,1]
|
145 |
+
if mask:
|
146 |
+
confident_mask = torch.ones_like(confident)
|
147 |
+
confident_mask[confident <= self.threshold] = float("-INF")
|
148 |
+
rtn = rtn * confident_mask
|
149 |
+
if self.confidence:
|
150 |
+
return rtn, confident
|
151 |
+
else:
|
152 |
+
return rtn
|
153 |
|
154 |
def cent_to_f0(self, cent):
|
155 |
return 10. * 2 ** (cent / 1200.)
|
|
|
188 |
f0_min=self.args.model.f0_min,
|
189 |
confidence=self.args.model.confidence,
|
190 |
)
|
|
|
191 |
model.to(self.device).to(self.dtype)
|
192 |
model.load_state_dict(ckpt['model'])
|
193 |
model.eval()
|
modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc differ
|
|
modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc
CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc differ
|
|
modules/__pycache__/DSConv.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/DSConv.cpython-38.pyc and b/modules/__pycache__/DSConv.cpython-38.pyc differ
|
|
modules/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/__init__.cpython-38.pyc and b/modules/__pycache__/__init__.cpython-38.pyc differ
|
|
modules/__pycache__/attentions.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/attentions.cpython-38.pyc and b/modules/__pycache__/attentions.cpython-38.pyc differ
|
|
modules/__pycache__/commons.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/commons.cpython-38.pyc and b/modules/__pycache__/commons.cpython-38.pyc differ
|
|
modules/__pycache__/enhancer.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/enhancer.cpython-38.pyc and b/modules/__pycache__/enhancer.cpython-38.pyc differ
|
|
modules/__pycache__/losses.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/losses.cpython-38.pyc and b/modules/__pycache__/losses.cpython-38.pyc differ
|
|
modules/__pycache__/mel_processing.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/mel_processing.cpython-38.pyc and b/modules/__pycache__/mel_processing.cpython-38.pyc differ
|
|
modules/__pycache__/modules.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/modules.cpython-38.pyc and b/modules/__pycache__/modules.cpython-38.pyc differ
|
|
pretrain/meta.py
CHANGED
@@ -12,9 +12,17 @@ def download_dict():
|
|
12 |
"url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
|
13 |
"output": "./pretrain/hubert-soft-0d54a1f4.pt"
|
14 |
},
|
|
|
|
|
|
|
|
|
15 |
"whisper-ppg": {
|
16 |
"url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
|
17 |
"output": "./pretrain/medium.pt"
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
}
|
20 |
|
|
|
12 |
"url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
|
13 |
"output": "./pretrain/hubert-soft-0d54a1f4.pt"
|
14 |
},
|
15 |
+
"whisper-ppg-small": {
|
16 |
+
"url": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
|
17 |
+
"output": "./pretrain/small.pt"
|
18 |
+
},
|
19 |
"whisper-ppg": {
|
20 |
"url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
|
21 |
"output": "./pretrain/medium.pt"
|
22 |
+
},
|
23 |
+
"whisper-ppg-large": {
|
24 |
+
"url": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
|
25 |
+
"output": "./pretrain/large-v2.pt"
|
26 |
}
|
27 |
}
|
28 |
|
train_diff.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import argparse
|
2 |
|
3 |
import torch
|
|
|
4 |
from torch.optim import lr_scheduler
|
5 |
|
6 |
from diffusion.data_loaders import get_data_loaders
|
@@ -28,8 +29,8 @@ if __name__ == '__main__':
|
|
28 |
|
29 |
# load config
|
30 |
args = utils.load_config(cmd.config)
|
31 |
-
|
32 |
-
|
33 |
|
34 |
# load vocoder
|
35 |
vocoder = Vocoder(args.vocoder.type, args.vocoder.ckpt, device=args.device)
|
@@ -47,7 +48,7 @@ if __name__ == '__main__':
|
|
47 |
args.model.k_step_max
|
48 |
)
|
49 |
|
50 |
-
|
51 |
|
52 |
# load parameters
|
53 |
optimizer = torch.optim.AdamW(model.parameters())
|
|
|
1 |
import argparse
|
2 |
|
3 |
import torch
|
4 |
+
from loguru import logger
|
5 |
from torch.optim import lr_scheduler
|
6 |
|
7 |
from diffusion.data_loaders import get_data_loaders
|
|
|
29 |
|
30 |
# load config
|
31 |
args = utils.load_config(cmd.config)
|
32 |
+
logger.info(' > config:'+ cmd.config)
|
33 |
+
logger.info(' > exp:'+ args.env.expdir)
|
34 |
|
35 |
# load vocoder
|
36 |
vocoder = Vocoder(args.vocoder.type, args.vocoder.ckpt, device=args.device)
|
|
|
48 |
args.model.k_step_max
|
49 |
)
|
50 |
|
51 |
+
logger.info(f' > Now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}')
|
52 |
|
53 |
# load parameters
|
54 |
optimizer = torch.optim.AdamW(model.parameters())
|