darksakura commited on
Commit
5b74e25
·
1 Parent(s): 46bab9a

Upload 85 files

Browse files
Files changed (36) hide show
  1. inference/__pycache__/__init__.cpython-38.pyc +0 -0
  2. inference/__pycache__/infer_tool_webui.cpython-38.pyc +0 -0
  3. inference/__pycache__/slicer.cpython-38.pyc +0 -0
  4. inference/infer_tool.py +1 -0
  5. inference/infer_tool_webui.py +22 -12
  6. modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc +0 -0
  7. modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc +0 -0
  8. modules/F0Predictor/__pycache__/FCPEF0Predictor.cpython-38.pyc +0 -0
  9. modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc +0 -0
  10. modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc +0 -0
  11. modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc +0 -0
  12. modules/F0Predictor/__pycache__/__init__.cpython-38.pyc +0 -0
  13. modules/F0Predictor/__pycache__/crepe.cpython-38.pyc +0 -0
  14. modules/F0Predictor/fcpe/__pycache__/__init__.cpython-38.pyc +0 -0
  15. modules/F0Predictor/fcpe/__pycache__/model.cpython-38.pyc +0 -0
  16. modules/F0Predictor/fcpe/__pycache__/nvSTFT.cpython-38.pyc +0 -0
  17. modules/F0Predictor/fcpe/__pycache__/pcmer.cpython-38.pyc +0 -0
  18. modules/F0Predictor/fcpe/model.py +25 -3
  19. modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc +0 -0
  20. modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc +0 -0
  21. modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc +0 -0
  22. modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc +0 -0
  23. modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc +0 -0
  24. modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc +0 -0
  25. modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc +0 -0
  26. modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc +0 -0
  27. modules/__pycache__/DSConv.cpython-38.pyc +0 -0
  28. modules/__pycache__/__init__.cpython-38.pyc +0 -0
  29. modules/__pycache__/attentions.cpython-38.pyc +0 -0
  30. modules/__pycache__/commons.cpython-38.pyc +0 -0
  31. modules/__pycache__/enhancer.cpython-38.pyc +0 -0
  32. modules/__pycache__/losses.cpython-38.pyc +0 -0
  33. modules/__pycache__/mel_processing.cpython-38.pyc +0 -0
  34. modules/__pycache__/modules.cpython-38.pyc +0 -0
  35. pretrain/meta.py +8 -0
  36. train_diff.py +4 -3
inference/__pycache__/__init__.cpython-38.pyc CHANGED
Binary files a/inference/__pycache__/__init__.cpython-38.pyc and b/inference/__pycache__/__init__.cpython-38.pyc differ
 
inference/__pycache__/infer_tool_webui.cpython-38.pyc CHANGED
Binary files a/inference/__pycache__/infer_tool_webui.cpython-38.pyc and b/inference/__pycache__/infer_tool_webui.cpython-38.pyc differ
 
inference/__pycache__/slicer.cpython-38.pyc CHANGED
Binary files a/inference/__pycache__/slicer.cpython-38.pyc and b/inference/__pycache__/slicer.cpython-38.pyc differ
 
inference/infer_tool.py CHANGED
@@ -267,6 +267,7 @@ class Svc(object):
267
  second_encoding = False,
268
  loudness_envelope_adjustment = 1
269
  ):
 
270
  wav, sr = torchaudio.load(raw_path)
271
  if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
272
  self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)
 
267
  second_encoding = False,
268
  loudness_envelope_adjustment = 1
269
  ):
270
+ torchaudio.set_audio_backend("soundfile")
271
  wav, sr = torchaudio.load(raw_path)
272
  if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
273
  self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)
inference/infer_tool_webui.py CHANGED
@@ -152,6 +152,7 @@ class Svc(object):
152
  self.target_sample = self.diffusion_args.data.sampling_rate
153
  self.hop_size = self.diffusion_args.data.block_size
154
  self.spk2id = self.diffusion_args.spk
 
155
  self.speech_encoder = self.diffusion_args.data.encoder
156
  self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode is not None else 'left'
157
  if spk_mix_enable:
@@ -203,9 +204,10 @@ class Svc(object):
203
 
204
  def get_unit_f0(self, wav, tran, cluster_infer_ratio, speaker, f0_filter ,f0_predictor,cr_threshold=0.05):
205
 
206
- f0_predictor_object = utils.get_f0_predictor(f0_predictor,hop_length=self.hop_size,sampling_rate=self.target_sample,device=self.dev,threshold=cr_threshold)
207
-
208
- f0, uv = f0_predictor_object.compute_f0_uv(wav)
 
209
  if f0_filter and sum(f0) == 0:
210
  raise F0FilterException("No voice detected")
211
  f0 = torch.FloatTensor(f0).to(self.dev)
@@ -215,21 +217,24 @@ class Svc(object):
215
  f0 = f0.unsqueeze(0)
216
  uv = uv.unsqueeze(0)
217
 
218
- wav16k = librosa.resample(wav, orig_sr=self.target_sample, target_sr=16000)
219
- wav16k = torch.from_numpy(wav16k).to(self.dev)
 
 
 
220
  c = self.hubert_model.encoder(wav16k)
221
  c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
222
 
223
  if cluster_infer_ratio !=0:
224
  if self.feature_retrieval:
225
  speaker_id = self.spk2id.get(speaker)
226
- if speaker_id is None:
227
- raise RuntimeError("The name you entered is not in the speaker list!")
228
  if not speaker_id and type(speaker) is int:
229
  if len(self.spk2id.__dict__) >= speaker:
230
  speaker_id = speaker
 
 
231
  feature_index = self.cluster_model[speaker_id]
232
- feat_np = c.transpose(0,1).cpu().numpy()
233
  if self.big_npy is None or self.now_spk_id != speaker_id:
234
  self.big_npy = feature_index.reconstruct_n(0, feature_index.ntotal)
235
  self.now_spk_id = speaker_id
@@ -248,7 +253,7 @@ class Svc(object):
248
 
249
  c = c.unsqueeze(0)
250
  return c, f0, uv
251
-
252
  def infer(self, speaker, tran, raw_path,
253
  cluster_infer_ratio=0,
254
  auto_predict_f0=False,
@@ -263,7 +268,11 @@ class Svc(object):
263
  second_encoding = False,
264
  loudness_envelope_adjustment = 1
265
  ):
266
- wav, sr = librosa.load(raw_path, sr=self.target_sample)
 
 
 
 
267
  if spk_mix:
268
  c, f0, uv = self.get_unit_f0(wav, tran, 0, None, f0_filter,f0_predictor,cr_threshold=cr_threshold)
269
  n_frames = f0.size(1)
@@ -299,8 +308,9 @@ class Svc(object):
299
  if self.only_diffusion or self.shallow_diffusion:
300
  vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol is None else vol[:,:,None]
301
  if self.shallow_diffusion and second_encoding:
302
- audio16k = librosa.resample(audio.detach().cpu().numpy(), orig_sr=self.target_sample, target_sr=16000)
303
- audio16k = torch.from_numpy(audio16k).to(self.dev)
 
304
  c = self.hubert_model.encoder(audio16k)
305
  c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
306
  f0 = f0[:,:,None]
 
152
  self.target_sample = self.diffusion_args.data.sampling_rate
153
  self.hop_size = self.diffusion_args.data.block_size
154
  self.spk2id = self.diffusion_args.spk
155
+ self.dtype = torch.float32
156
  self.speech_encoder = self.diffusion_args.data.encoder
157
  self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode is not None else 'left'
158
  if spk_mix_enable:
 
204
 
205
  def get_unit_f0(self, wav, tran, cluster_infer_ratio, speaker, f0_filter ,f0_predictor,cr_threshold=0.05):
206
 
207
+ if not hasattr(self,"f0_predictor_object") or self.f0_predictor_object is None or f0_predictor != self.f0_predictor_object.name:
208
+ self.f0_predictor_object = utils.get_f0_predictor(f0_predictor,hop_length=self.hop_size,sampling_rate=self.target_sample,device=self.dev,threshold=cr_threshold)
209
+ f0, uv = self.f0_predictor_object.compute_f0_uv(wav)
210
+
211
  if f0_filter and sum(f0) == 0:
212
  raise F0FilterException("No voice detected")
213
  f0 = torch.FloatTensor(f0).to(self.dev)
 
217
  f0 = f0.unsqueeze(0)
218
  uv = uv.unsqueeze(0)
219
 
220
+ wav = torch.from_numpy(wav).to(self.dev)
221
+ if not hasattr(self,"audio16k_resample_transform"):
222
+ self.audio16k_resample_transform = torchaudio.transforms.Resample(self.target_sample, 16000).to(self.dev)
223
+ wav16k = self.audio16k_resample_transform(wav[None,:])[0]
224
+
225
  c = self.hubert_model.encoder(wav16k)
226
  c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
227
 
228
  if cluster_infer_ratio !=0:
229
  if self.feature_retrieval:
230
  speaker_id = self.spk2id.get(speaker)
 
 
231
  if not speaker_id and type(speaker) is int:
232
  if len(self.spk2id.__dict__) >= speaker:
233
  speaker_id = speaker
234
+ if speaker_id is None:
235
+ raise RuntimeError("The name you entered is not in the speaker list!")
236
  feature_index = self.cluster_model[speaker_id]
237
+ feat_np = np.ascontiguousarray(c.transpose(0,1).cpu().numpy())
238
  if self.big_npy is None or self.now_spk_id != speaker_id:
239
  self.big_npy = feature_index.reconstruct_n(0, feature_index.ntotal)
240
  self.now_spk_id = speaker_id
 
253
 
254
  c = c.unsqueeze(0)
255
  return c, f0, uv
256
+
257
  def infer(self, speaker, tran, raw_path,
258
  cluster_infer_ratio=0,
259
  auto_predict_f0=False,
 
268
  second_encoding = False,
269
  loudness_envelope_adjustment = 1
270
  ):
271
+ torchaudio.set_audio_backend("soundfile")
272
+ wav, sr = torchaudio.load(raw_path)
273
+ if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
274
+ self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)
275
+ wav = self.audio_resample_transform(wav).numpy()[0]
276
  if spk_mix:
277
  c, f0, uv = self.get_unit_f0(wav, tran, 0, None, f0_filter,f0_predictor,cr_threshold=cr_threshold)
278
  n_frames = f0.size(1)
 
308
  if self.only_diffusion or self.shallow_diffusion:
309
  vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol is None else vol[:,:,None]
310
  if self.shallow_diffusion and second_encoding:
311
+ if not hasattr(self,"audio16k_resample_transform"):
312
+ self.audio16k_resample_transform = torchaudio.transforms.Resample(self.target_sample, 16000).to(self.dev)
313
+ audio16k = self.audio16k_resample_transform(audio[None,:])[0]
314
  c = self.hubert_model.encoder(audio16k)
315
  c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1],self.unit_interpolate_mode)
316
  f0 = f0[:,:,None]
modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/CrepeF0Predictor.cpython-38.pyc differ
 
modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/F0Predictor.cpython-38.pyc differ
 
modules/F0Predictor/__pycache__/FCPEF0Predictor.cpython-38.pyc ADDED
Binary file (3.35 kB). View file
 
modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-38.pyc differ
 
modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/PMF0Predictor.cpython-38.pyc differ
 
modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc and b/modules/F0Predictor/__pycache__/RMVPEF0Predictor.cpython-38.pyc differ
 
modules/F0Predictor/__pycache__/__init__.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/__init__.cpython-38.pyc and b/modules/F0Predictor/__pycache__/__init__.cpython-38.pyc differ
 
modules/F0Predictor/__pycache__/crepe.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/__pycache__/crepe.cpython-38.pyc and b/modules/F0Predictor/__pycache__/crepe.cpython-38.pyc differ
 
modules/F0Predictor/fcpe/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (254 Bytes). View file
 
modules/F0Predictor/fcpe/__pycache__/model.cpython-38.pyc ADDED
Binary file (8.08 kB). View file
 
modules/F0Predictor/fcpe/__pycache__/nvSTFT.cpython-38.pyc ADDED
Binary file (4.4 kB). View file
 
modules/F0Predictor/fcpe/__pycache__/pcmer.cpython-38.pyc ADDED
Binary file (11.5 kB). View file
 
modules/F0Predictor/fcpe/model.py CHANGED
@@ -84,13 +84,17 @@ class FCPE(nn.Module):
84
  self.dense_out = weight_norm(
85
  nn.Linear(n_chans, self.n_out))
86
 
87
- def forward(self, mel, infer=True, gt_f0=None, return_hz_f0=False):
88
  """
89
  input:
90
  B x n_frames x n_unit
91
  return:
92
  dict of B x n_frames x feat
93
  """
 
 
 
 
94
  if self.use_input_conv:
95
  x = self.stack(mel.transpose(1, 2)).transpose(1, 2)
96
  else:
@@ -108,7 +112,7 @@ class FCPE(nn.Module):
108
  loss_all = loss_all + l2_regularization(model=self, l2_alpha=self.loss_l2_regularization_scale)
109
  x = loss_all
110
  if infer:
111
- x = self.cents_decoder(x)
112
  x = self.cent_to_f0(x)
113
  if not return_hz_f0:
114
  x = (1 + x / 700).log()
@@ -127,6 +131,25 @@ class FCPE(nn.Module):
127
  return rtn, confident
128
  else:
129
  return rtn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  def cent_to_f0(self, cent):
132
  return 10. * 2 ** (cent / 1200.)
@@ -165,7 +188,6 @@ class FCPEInfer:
165
  f0_min=self.args.model.f0_min,
166
  confidence=self.args.model.confidence,
167
  )
168
- ckpt = torch.load(model_path, map_location=torch.device(self.device))
169
  model.to(self.device).to(self.dtype)
170
  model.load_state_dict(ckpt['model'])
171
  model.eval()
 
84
  self.dense_out = weight_norm(
85
  nn.Linear(n_chans, self.n_out))
86
 
87
+ def forward(self, mel, infer=True, gt_f0=None, return_hz_f0=False, cdecoder = "local_argmax"):
88
  """
89
  input:
90
  B x n_frames x n_unit
91
  return:
92
  dict of B x n_frames x feat
93
  """
94
+ if cdecoder == "argmax":
95
+ self.cdecoder = self.cents_decoder
96
+ elif cdecoder == "local_argmax":
97
+ self.cdecoder = self.cents_local_decoder
98
  if self.use_input_conv:
99
  x = self.stack(mel.transpose(1, 2)).transpose(1, 2)
100
  else:
 
112
  loss_all = loss_all + l2_regularization(model=self, l2_alpha=self.loss_l2_regularization_scale)
113
  x = loss_all
114
  if infer:
115
+ x = self.cdecoder(x)
116
  x = self.cent_to_f0(x)
117
  if not return_hz_f0:
118
  x = (1 + x / 700).log()
 
131
  return rtn, confident
132
  else:
133
  return rtn
134
+
135
+ def cents_local_decoder(self, y, mask=True):
136
+ B, N, _ = y.size()
137
+ ci = self.cent_table[None, None, :].expand(B, N, -1)
138
+ confident, max_index = torch.max(y, dim=-1, keepdim=True)
139
+ local_argmax_index = torch.arange(0,8).to(max_index.device) + (max_index - 4)
140
+ local_argmax_index[local_argmax_index<0] = 0
141
+ local_argmax_index[local_argmax_index>=self.n_out] = self.n_out - 1
142
+ ci_l = torch.gather(ci,-1,local_argmax_index)
143
+ y_l = torch.gather(y,-1,local_argmax_index)
144
+ rtn = torch.sum(ci_l * y_l, dim=-1, keepdim=True) / torch.sum(y_l, dim=-1, keepdim=True) # cents: [B,N,1]
145
+ if mask:
146
+ confident_mask = torch.ones_like(confident)
147
+ confident_mask[confident <= self.threshold] = float("-INF")
148
+ rtn = rtn * confident_mask
149
+ if self.confidence:
150
+ return rtn, confident
151
+ else:
152
+ return rtn
153
 
154
  def cent_to_f0(self, cent):
155
  return 10. * 2 ** (cent / 1200.)
 
188
  f0_min=self.args.model.f0_min,
189
  confidence=self.args.model.confidence,
190
  )
 
191
  model.to(self.device).to(self.dtype)
192
  model.load_state_dict(ckpt['model'])
193
  model.eval()
modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/__init__.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/constants.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/deepunet.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/inference.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/model.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/seq.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/spec.cpython-38.pyc differ
 
modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc CHANGED
Binary files a/modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc and b/modules/F0Predictor/rmvpe/__pycache__/utils.cpython-38.pyc differ
 
modules/__pycache__/DSConv.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/DSConv.cpython-38.pyc and b/modules/__pycache__/DSConv.cpython-38.pyc differ
 
modules/__pycache__/__init__.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/__init__.cpython-38.pyc and b/modules/__pycache__/__init__.cpython-38.pyc differ
 
modules/__pycache__/attentions.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/attentions.cpython-38.pyc and b/modules/__pycache__/attentions.cpython-38.pyc differ
 
modules/__pycache__/commons.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/commons.cpython-38.pyc and b/modules/__pycache__/commons.cpython-38.pyc differ
 
modules/__pycache__/enhancer.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/enhancer.cpython-38.pyc and b/modules/__pycache__/enhancer.cpython-38.pyc differ
 
modules/__pycache__/losses.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/losses.cpython-38.pyc and b/modules/__pycache__/losses.cpython-38.pyc differ
 
modules/__pycache__/mel_processing.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/mel_processing.cpython-38.pyc and b/modules/__pycache__/mel_processing.cpython-38.pyc differ
 
modules/__pycache__/modules.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/modules.cpython-38.pyc and b/modules/__pycache__/modules.cpython-38.pyc differ
 
pretrain/meta.py CHANGED
@@ -12,9 +12,17 @@ def download_dict():
12
  "url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
13
  "output": "./pretrain/hubert-soft-0d54a1f4.pt"
14
  },
 
 
 
 
15
  "whisper-ppg": {
16
  "url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
17
  "output": "./pretrain/medium.pt"
 
 
 
 
18
  }
19
  }
20
 
 
12
  "url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
13
  "output": "./pretrain/hubert-soft-0d54a1f4.pt"
14
  },
15
+ "whisper-ppg-small": {
16
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
17
+ "output": "./pretrain/small.pt"
18
+ },
19
  "whisper-ppg": {
20
  "url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
21
  "output": "./pretrain/medium.pt"
22
+ },
23
+ "whisper-ppg-large": {
24
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
25
+ "output": "./pretrain/large-v2.pt"
26
  }
27
  }
28
 
train_diff.py CHANGED
@@ -1,6 +1,7 @@
1
  import argparse
2
 
3
  import torch
 
4
  from torch.optim import lr_scheduler
5
 
6
  from diffusion.data_loaders import get_data_loaders
@@ -28,8 +29,8 @@ if __name__ == '__main__':
28
 
29
  # load config
30
  args = utils.load_config(cmd.config)
31
- print(' > config:', cmd.config)
32
- print(' > exp:', args.env.expdir)
33
 
34
  # load vocoder
35
  vocoder = Vocoder(args.vocoder.type, args.vocoder.ckpt, device=args.device)
@@ -47,7 +48,7 @@ if __name__ == '__main__':
47
  args.model.k_step_max
48
  )
49
 
50
- print(f' > INFO: now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}')
51
 
52
  # load parameters
53
  optimizer = torch.optim.AdamW(model.parameters())
 
1
  import argparse
2
 
3
  import torch
4
+ from loguru import logger
5
  from torch.optim import lr_scheduler
6
 
7
  from diffusion.data_loaders import get_data_loaders
 
29
 
30
  # load config
31
  args = utils.load_config(cmd.config)
32
+ logger.info(' > config:'+ cmd.config)
33
+ logger.info(' > exp:'+ args.env.expdir)
34
 
35
  # load vocoder
36
  vocoder = Vocoder(args.vocoder.type, args.vocoder.ckpt, device=args.device)
 
48
  args.model.k_step_max
49
  )
50
 
51
+ logger.info(f' > Now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}')
52
 
53
  # load parameters
54
  optimizer = torch.optim.AdamW(model.parameters())