MarcusSu1216 commited on
Commit
e7d77e7
1 Parent(s): 5ec3a59

Update preprocess_hubert_f0.py

Browse files
Files changed (1) hide show
  1. preprocess_hubert_f0.py +9 -48
preprocess_hubert_f0.py CHANGED
@@ -7,12 +7,10 @@ from random import shuffle
7
  import torch
8
  from glob import glob
9
  from tqdm import tqdm
10
- from modules.mel_processing import spectrogram_torch
11
 
12
  import utils
13
  import logging
14
-
15
- logging.getLogger("numba").setLevel(logging.WARNING)
16
  import librosa
17
  import numpy as np
18
 
@@ -26,47 +24,16 @@ def process_one(filename, hmodel):
26
  wav, sr = librosa.load(filename, sr=sampling_rate)
27
  soft_path = filename + ".soft.pt"
28
  if not os.path.exists(soft_path):
29
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
  wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
31
- wav16k = torch.from_numpy(wav16k).to(device)
32
  c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
33
  torch.save(c.cpu(), soft_path)
34
-
35
  f0_path = filename + ".f0.npy"
36
  if not os.path.exists(f0_path):
37
- f0 = utils.compute_f0_dio(
38
- wav, sampling_rate=sampling_rate, hop_length=hop_length
39
- )
40
  np.save(f0_path, f0)
41
 
42
- spec_path = filename.replace(".wav", ".spec.pt")
43
- if not os.path.exists(spec_path):
44
- # Process spectrogram
45
- # The following code can't be replaced by torch.FloatTensor(wav)
46
- # because load_wav_to_torch return a tensor that need to be normalized
47
-
48
- audio, sr = utils.load_wav_to_torch(filename)
49
- if sr != hps.data.sampling_rate:
50
- raise ValueError(
51
- "{} SR doesn't match target {} SR".format(
52
- sr, hps.data.sampling_rate
53
- )
54
- )
55
-
56
- audio_norm = audio / hps.data.max_wav_value
57
- audio_norm = audio_norm.unsqueeze(0)
58
-
59
- spec = spectrogram_torch(
60
- audio_norm,
61
- hps.data.filter_length,
62
- hps.data.sampling_rate,
63
- hps.data.hop_length,
64
- hps.data.win_length,
65
- center=False,
66
- )
67
- spec = torch.squeeze(spec, 0)
68
- torch.save(spec, spec_path)
69
-
70
 
71
  def process_batch(filenames):
72
  print("Loading hubert for content...")
@@ -79,23 +46,17 @@ def process_batch(filenames):
79
 
80
  if __name__ == "__main__":
81
  parser = argparse.ArgumentParser()
82
- parser.add_argument(
83
- "--in_dir", type=str, default="dataset/44k", help="path to input dir"
84
- )
85
 
86
  args = parser.parse_args()
87
- filenames = glob(f"{args.in_dir}/*/*.wav", recursive=True) # [:10]
88
  shuffle(filenames)
89
- multiprocessing.set_start_method("spawn", force=True)
90
 
91
  num_processes = 1
92
  chunk_size = int(math.ceil(len(filenames) / num_processes))
93
- chunks = [
94
- filenames[i : i + chunk_size] for i in range(0, len(filenames), chunk_size)
95
- ]
96
  print([len(c) for c in chunks])
97
- processes = [
98
- multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks
99
- ]
100
  for p in processes:
101
  p.start()
 
7
  import torch
8
  from glob import glob
9
  from tqdm import tqdm
 
10
 
11
  import utils
12
  import logging
13
+ logging.getLogger('numba').setLevel(logging.WARNING)
 
14
  import librosa
15
  import numpy as np
16
 
 
24
  wav, sr = librosa.load(filename, sr=sampling_rate)
25
  soft_path = filename + ".soft.pt"
26
  if not os.path.exists(soft_path):
27
+ devive = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
  wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
29
+ wav16k = torch.from_numpy(wav16k).to(devive)
30
  c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
31
  torch.save(c.cpu(), soft_path)
 
32
  f0_path = filename + ".f0.npy"
33
  if not os.path.exists(f0_path):
34
+ f0 = utils.compute_f0_dio(wav, sampling_rate=sampling_rate, hop_length=hop_length)
 
 
35
  np.save(f0_path, f0)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def process_batch(filenames):
39
  print("Loading hubert for content...")
 
46
 
47
  if __name__ == "__main__":
48
  parser = argparse.ArgumentParser()
49
+ parser.add_argument("--in_dir", type=str, default="dataset/44k", help="path to input dir")
 
 
50
 
51
  args = parser.parse_args()
52
+ filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True) # [:10]
53
  shuffle(filenames)
54
+ multiprocessing.set_start_method('spawn',force=True)
55
 
56
  num_processes = 1
57
  chunk_size = int(math.ceil(len(filenames) / num_processes))
58
+ chunks = [filenames[i:i + chunk_size] for i in range(0, len(filenames), chunk_size)]
 
 
59
  print([len(c) for c in chunks])
60
+ processes = [multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks]
 
 
61
  for p in processes:
62
  p.start()