Spaces:
Running
Running
import torch | |
from tqdm import tqdm | |
from multiprocessing import Pool | |
from mel_processing import spectrogram_torch, mel_spectrogram_torch | |
from utils import load_wav_to_torch | |
torch.set_num_threads(1) | |
class AudioProcessor: | |
def __init__( | |
self, | |
max_wav_value, | |
use_mel_spec_posterior, | |
filter_length, | |
n_mel_channels, | |
sampling_rate, | |
hop_length, | |
win_length, | |
mel_fmin, | |
mel_fmax, | |
): | |
self.max_wav_value = max_wav_value | |
self.use_mel_spec_posterior = use_mel_spec_posterior | |
self.filter_length = filter_length | |
self.n_mel_channels = n_mel_channels | |
self.sampling_rate = sampling_rate | |
self.hop_length = hop_length | |
self.win_length = win_length | |
self.mel_fmin = mel_fmin | |
self.mel_fmax = mel_fmax | |
def process_audio(self, filename): | |
audio, sampling_rate = load_wav_to_torch(filename) | |
audio_norm = audio / self.max_wav_value | |
audio_norm = audio_norm.unsqueeze(0) | |
spec_filename = filename.replace(".wav", ".spec.pt") | |
if self.use_mel_spec_posterior: | |
spec_filename = spec_filename.replace(".spec.pt", ".mel.pt") | |
try: | |
spec = torch.load(spec_filename) | |
except: | |
if self.use_mel_spec_posterior: | |
spec = mel_spectrogram_torch( | |
audio_norm, | |
self.filter_length, | |
self.n_mel_channels, | |
self.sampling_rate, | |
self.hop_length, | |
self.win_length, | |
self.mel_fmin, | |
self.mel_fmax, | |
center=False, | |
) | |
else: | |
spec = spectrogram_torch( | |
audio_norm, | |
self.filter_length, | |
self.sampling_rate, | |
self.hop_length, | |
self.win_length, | |
center=False, | |
) | |
spec = torch.squeeze(spec, 0) | |
torch.save(spec, spec_filename) | |
return spec, audio_norm | |
# 使用示例 | |
processor = AudioProcessor( | |
max_wav_value=32768.0, | |
use_mel_spec_posterior=False, | |
filter_length=2048, | |
n_mel_channels=160, | |
sampling_rate=44100, | |
hop_length=512, | |
win_length=2048, | |
mel_fmin=0.0, | |
mel_fmax="null", | |
) | |
with open("filelists/train.list", "r") as f: | |
filepaths = [line.split("|")[0] for line in f] # 取每一行的第一部分作为audiopath | |
# 使用多进程处理 | |
with Pool(processes=32) as pool: # 使用32个进程 | |
with tqdm(total=len(filepaths)) as pbar: | |
for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)): | |
pbar.update() | |