File size: 2,977 Bytes
abf6bf2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import argparse
import os
import subprocess
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm
from lib import dataset
from lib import spec_utils
if __name__ == '__main__':
p = argparse.ArgumentParser()
p.add_argument('--sr', '-r', type=int, default=44100)
p.add_argument('--hop_length', '-l', type=int, default=1024)
p.add_argument('--n_fft', '-f', type=int, default=2048)
p.add_argument('--pitch', '-p', type=int, default=-1)
p.add_argument('--mixtures', '-m', required=True)
p.add_argument('--instruments', '-i', required=True)
args = p.parse_args()
input_i = 'input_i_{}.wav'.format(args.pitch)
input_v = 'input_v_{}.wav'.format(args.pitch)
output_i = 'output_i_{}.wav'.format(args.pitch)
output_v = 'output_v_{}.wav'.format(args.pitch)
cmd_i = 'soundstretch {} {} -pitch={}'.format(input_i, output_i, args.pitch)
cmd_v = 'soundstretch {} {} -pitch={}'.format(input_v, output_v, args.pitch)
cache_suffix = '_pitch{}.npy'.format(args.pitch)
cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args. hop_length, args.n_fft)
mix_cache_dir = os.path.join(args.mixtures, cache_dir)
inst_cache_dir = os.path.join(args.instruments, cache_dir)
os.makedirs(mix_cache_dir, exist_ok=True)
os.makedirs(inst_cache_dir, exist_ok=True)
filelist = dataset.make_pair(args.mixtures, args.instruments)
for mix_path, inst_path in tqdm(filelist):
mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
mix_cache_path = os.path.join(mix_cache_dir, mix_basename + cache_suffix)
inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
inst_cache_path = os.path.join(inst_cache_dir, inst_basename + cache_suffix)
if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
continue
X, _ = librosa.load(
mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
y, _ = librosa.load(
inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
v = X - y
sf.write(input_i, y.T, args.sr)
sf.write(input_v, v.T, args.sr)
subprocess.call(cmd_i, stderr=subprocess.DEVNULL)
subprocess.call(cmd_v, stderr=subprocess.DEVNULL)
y, _ = librosa.load(
output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
v, _ = librosa.load(
output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
X = y + v
spec = spec_utils.wave_to_spectrogram(X, args.hop_length, args.n_fft)
np.save(mix_cache_path, spec)
spec = spec_utils.wave_to_spectrogram(y, args.hop_length, args.n_fft)
np.save(inst_cache_path, spec)
os.remove(input_i)
os.remove(input_v)
os.remove(output_i)
os.remove(output_v)
|