Spaces:
Runtime error
Runtime error
import argparse | |
import glob | |
import importlib | |
import os | |
import subprocess | |
from utils.commons.hparams import set_hparams, hparams | |
from utils.commons.multiprocess_utils import multiprocess_run_tqdm | |
from utils.os_utils import remove_file | |
from utils.text.encoding import get_encoding | |
def process_item(idx, txt_fn): | |
base_fn = os.path.splitext(txt_fn)[0] | |
basename = os.path.basename(base_fn) | |
if os.path.exists(base_fn + '.wav'): | |
wav_fn = base_fn + '.wav' | |
elif os.path.exists(base_fn + '.mp3'): | |
wav_fn = base_fn + '.mp3' | |
else: | |
return | |
# process text | |
encoding = get_encoding(txt_fn) | |
with open(txt_fn, encoding=encoding) as f: | |
txt_raw = " ".join(f.readlines()).strip() | |
phs, _, phs_for_align, _ = preprocesser.process_text(txt_processor, txt_raw, hparams['preprocess_args']) | |
os.makedirs(f'{mfa_process_dir}/{basename}', exist_ok=True) | |
with open(f'{mfa_process_dir}/{basename}/{basename}.lab', 'w') as f: | |
f.write(phs_for_align) | |
# process wav | |
new_wav_fn = preprocesser.process_wav(basename, wav_fn, mfa_process_dir, preprocess_args) | |
subprocess.check_call(f'cp "{new_wav_fn}" "{mfa_process_dir}/{basename}/{basename}.wav"', shell=True) | |
if __name__ == "__main__": | |
set_hparams() | |
parser = argparse.ArgumentParser(description='') | |
parser.add_argument('--input_dir', type=str, default='', help='input dir') | |
args, unknown = parser.parse_known_args() | |
input_dir = args.input_dir | |
processed_data_dir = hparams['processed_data_dir'] | |
preprocess_args = hparams['preprocess_args'] | |
preprocess_args['sox_to_wav'] = True | |
preprocess_args['trim_all_sil'] = True | |
# preprocess_args['trim_sil'] = True | |
# preprocess_args['denoise'] = True | |
pkg = ".".join(hparams["preprocess_cls"].split(".")[:-1]) | |
cls_name = hparams["preprocess_cls"].split(".")[-1] | |
process_cls = getattr(importlib.import_module(pkg), cls_name) | |
preprocesser = process_cls() | |
txt_processor = preprocesser.txt_processor | |
num_workers = int(os.getenv('N_PROC', os.cpu_count())) | |
mfa_process_dir = f'{input_dir}/mfa_inputs' | |
remove_file(mfa_process_dir, f'{input_dir}/mfa_tmp') | |
os.makedirs(mfa_process_dir, exist_ok=True) | |
os.makedirs(f'{mfa_process_dir}/processed_tmp', exist_ok=True) | |
for res in multiprocess_run_tqdm( | |
process_item, list(enumerate(glob.glob(f'{input_dir}/*.txt')))): | |
pass | |
remove_file(f'{mfa_process_dir}/processed_tmp') | |
subprocess.check_call( | |
f'mfa align {mfa_process_dir} ' # process dir | |
f'{hparams["processed_data_dir"]}/mfa_dict.txt ' # dict | |
f'{input_dir}/mfa_model.zip ' # model | |
f'{input_dir}/mfa_outputs -t {input_dir}/mfa_tmp -j {num_workers} ' | |
f' && cp -rf {input_dir}/mfa_outputs/*/* {input_dir}/' | |
f' && cp -rf {mfa_process_dir}/*/* {input_dir}/' | |
f' && rm -rf {input_dir}/mfa_tmp {input_dir}/mfa_outputs {mfa_process_dir}', # remove tmp dir | |
shell=True) | |