Spaces:
Running
Running
MarcusSu1216
commited on
Commit
•
e7d77e7
1
Parent(s):
5ec3a59
Update preprocess_hubert_f0.py
Browse files- preprocess_hubert_f0.py +9 -48
preprocess_hubert_f0.py
CHANGED
@@ -7,12 +7,10 @@ from random import shuffle
|
|
7 |
import torch
|
8 |
from glob import glob
|
9 |
from tqdm import tqdm
|
10 |
-
from modules.mel_processing import spectrogram_torch
|
11 |
|
12 |
import utils
|
13 |
import logging
|
14 |
-
|
15 |
-
logging.getLogger("numba").setLevel(logging.WARNING)
|
16 |
import librosa
|
17 |
import numpy as np
|
18 |
|
@@ -26,47 +24,16 @@ def process_one(filename, hmodel):
|
|
26 |
wav, sr = librosa.load(filename, sr=sampling_rate)
|
27 |
soft_path = filename + ".soft.pt"
|
28 |
if not os.path.exists(soft_path):
|
29 |
-
|
30 |
wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
|
31 |
-
wav16k = torch.from_numpy(wav16k).to(
|
32 |
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
|
33 |
torch.save(c.cpu(), soft_path)
|
34 |
-
|
35 |
f0_path = filename + ".f0.npy"
|
36 |
if not os.path.exists(f0_path):
|
37 |
-
f0 = utils.compute_f0_dio(
|
38 |
-
wav, sampling_rate=sampling_rate, hop_length=hop_length
|
39 |
-
)
|
40 |
np.save(f0_path, f0)
|
41 |
|
42 |
-
spec_path = filename.replace(".wav", ".spec.pt")
|
43 |
-
if not os.path.exists(spec_path):
|
44 |
-
# Process spectrogram
|
45 |
-
# The following code can't be replaced by torch.FloatTensor(wav)
|
46 |
-
# because load_wav_to_torch return a tensor that need to be normalized
|
47 |
-
|
48 |
-
audio, sr = utils.load_wav_to_torch(filename)
|
49 |
-
if sr != hps.data.sampling_rate:
|
50 |
-
raise ValueError(
|
51 |
-
"{} SR doesn't match target {} SR".format(
|
52 |
-
sr, hps.data.sampling_rate
|
53 |
-
)
|
54 |
-
)
|
55 |
-
|
56 |
-
audio_norm = audio / hps.data.max_wav_value
|
57 |
-
audio_norm = audio_norm.unsqueeze(0)
|
58 |
-
|
59 |
-
spec = spectrogram_torch(
|
60 |
-
audio_norm,
|
61 |
-
hps.data.filter_length,
|
62 |
-
hps.data.sampling_rate,
|
63 |
-
hps.data.hop_length,
|
64 |
-
hps.data.win_length,
|
65 |
-
center=False,
|
66 |
-
)
|
67 |
-
spec = torch.squeeze(spec, 0)
|
68 |
-
torch.save(spec, spec_path)
|
69 |
-
|
70 |
|
71 |
def process_batch(filenames):
|
72 |
print("Loading hubert for content...")
|
@@ -79,23 +46,17 @@ def process_batch(filenames):
|
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
parser = argparse.ArgumentParser()
|
82 |
-
parser.add_argument(
|
83 |
-
"--in_dir", type=str, default="dataset/44k", help="path to input dir"
|
84 |
-
)
|
85 |
|
86 |
args = parser.parse_args()
|
87 |
-
filenames = glob(f
|
88 |
shuffle(filenames)
|
89 |
-
multiprocessing.set_start_method(
|
90 |
|
91 |
num_processes = 1
|
92 |
chunk_size = int(math.ceil(len(filenames) / num_processes))
|
93 |
-
chunks = [
|
94 |
-
filenames[i : i + chunk_size] for i in range(0, len(filenames), chunk_size)
|
95 |
-
]
|
96 |
print([len(c) for c in chunks])
|
97 |
-
processes = [
|
98 |
-
multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks
|
99 |
-
]
|
100 |
for p in processes:
|
101 |
p.start()
|
|
|
7 |
import torch
|
8 |
from glob import glob
|
9 |
from tqdm import tqdm
|
|
|
10 |
|
11 |
import utils
|
12 |
import logging
|
13 |
+
logging.getLogger('numba').setLevel(logging.WARNING)
|
|
|
14 |
import librosa
|
15 |
import numpy as np
|
16 |
|
|
|
24 |
wav, sr = librosa.load(filename, sr=sampling_rate)
|
25 |
soft_path = filename + ".soft.pt"
|
26 |
if not os.path.exists(soft_path):
|
27 |
+
devive = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
28 |
wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
|
29 |
+
wav16k = torch.from_numpy(wav16k).to(devive)
|
30 |
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
|
31 |
torch.save(c.cpu(), soft_path)
|
|
|
32 |
f0_path = filename + ".f0.npy"
|
33 |
if not os.path.exists(f0_path):
|
34 |
+
f0 = utils.compute_f0_dio(wav, sampling_rate=sampling_rate, hop_length=hop_length)
|
|
|
|
|
35 |
np.save(f0_path, f0)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def process_batch(filenames):
|
39 |
print("Loading hubert for content...")
|
|
|
46 |
|
47 |
if __name__ == "__main__":
|
48 |
parser = argparse.ArgumentParser()
|
49 |
+
parser.add_argument("--in_dir", type=str, default="dataset/44k", help="path to input dir")
|
|
|
|
|
50 |
|
51 |
args = parser.parse_args()
|
52 |
+
filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True) # [:10]
|
53 |
shuffle(filenames)
|
54 |
+
multiprocessing.set_start_method('spawn',force=True)
|
55 |
|
56 |
num_processes = 1
|
57 |
chunk_size = int(math.ceil(len(filenames) / num_processes))
|
58 |
+
chunks = [filenames[i:i + chunk_size] for i in range(0, len(filenames), chunk_size)]
|
|
|
|
|
59 |
print([len(c) for c in chunks])
|
60 |
+
processes = [multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks]
|
|
|
|
|
61 |
for p in processes:
|
62 |
p.start()
|