RayeRen's picture
init
d1b91e7
raw
history blame
30 kB
import atexit
import sys
import os
import time
import argparse
from datetime import datetime
import multiprocessing as mp
from montreal_forced_aligner import __version__
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_available_g2p_languages, \
get_available_dict_languages, get_available_lm_languages, get_available_ivector_languages
from montreal_forced_aligner.command_line.align import run_align_corpus
from mfa_usr.adapt import run_adapt_model
from montreal_forced_aligner.command_line.train_and_align import run_train_corpus
from montreal_forced_aligner.command_line.g2p import run_g2p
from montreal_forced_aligner.command_line.train_g2p import run_train_g2p
from montreal_forced_aligner.command_line.validate import run_validate_corpus
from montreal_forced_aligner.command_line.download import run_download
from montreal_forced_aligner.command_line.train_lm import run_train_lm
from montreal_forced_aligner.command_line.thirdparty import run_thirdparty
from montreal_forced_aligner.command_line.train_ivector_extractor import run_train_ivector_extractor
from montreal_forced_aligner.command_line.classify_speakers import run_classify_speakers
from montreal_forced_aligner.command_line.transcribe import run_transcribe_corpus
from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary
from montreal_forced_aligner.command_line.create_segments import run_create_segments
from montreal_forced_aligner.exceptions import MFAError
from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, \
load_command_history
class ExitHooks(object):
def __init__(self):
self.exit_code = None
self.exception = None
def hook(self):
self._orig_exit = sys.exit
sys.exit = self.exit
sys.excepthook = self.exc_handler
def exit(self, code=0):
self.exit_code = code
self._orig_exit(code)
def exc_handler(self, exc_type, exc, *args):
self.exception = exc
hooks = ExitHooks()
hooks.hook()
BEGIN = time.time()
BEGIN_DATE = datetime.now()
def history_save_handler():
history_data = {
'command': ' '.join(sys.argv),
'execution_time': time.time() - BEGIN,
'date': BEGIN_DATE,
'version': __version__
}
if hooks.exit_code is not None:
history_data['exit_code'] = hooks.exit_code
history_data['exception'] = ''
elif hooks.exception is not None:
history_data['exit_code'] = 1
history_data['exception'] = hooks.exception
else:
history_data['exception'] = ''
history_data['exit_code'] = 0
update_command_history(history_data)
atexit.register(history_save_handler)
def fix_path():
from montreal_forced_aligner.config import TEMP_DIR
thirdparty_dir = os.path.join(TEMP_DIR, 'thirdparty', 'bin')
old_path = os.environ.get('PATH', '')
if sys.platform == 'win32':
os.environ['PATH'] = thirdparty_dir + ';' + old_path
else:
os.environ['PATH'] = thirdparty_dir + ':' + old_path
os.environ['LD_LIBRARY_PATH'] = thirdparty_dir + ':' + os.environ.get('LD_LIBRARY_PATH', '')
def unfix_path():
if sys.platform == 'win32':
sep = ';'
os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
else:
sep = ':'
os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
os.environ['LD_LIBRARY_PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
acoustic_languages = get_available_acoustic_languages()
ivector_languages = get_available_ivector_languages()
lm_languages = get_available_lm_languages()
g2p_languages = get_available_g2p_languages()
dict_languages = get_available_dict_languages()
def create_parser():
GLOBAL_CONFIG = load_global_config()
def add_global_options(subparser, textgrid_output=False):
subparser.add_argument('-t', '--temp_directory', type=str, default=GLOBAL_CONFIG['temp_directory'],
help=f"Temporary directory root to store MFA created files, default is {GLOBAL_CONFIG['temp_directory']}")
subparser.add_argument('--disable_mp',
help=f"Disable any multiprocessing during alignment (not recommended), default is {not GLOBAL_CONFIG['use_mp']}",
action='store_true',
default=not GLOBAL_CONFIG['use_mp'])
subparser.add_argument('-j', '--num_jobs', type=int, default=GLOBAL_CONFIG['num_jobs'],
help=f"Number of data splits (and cores to use if multiprocessing is enabled), defaults "
f"is {GLOBAL_CONFIG['num_jobs']}")
subparser.add_argument('-v', '--verbose', help=f"Output debug messages, default is {GLOBAL_CONFIG['verbose']}",
action='store_true',
default=GLOBAL_CONFIG['verbose'])
subparser.add_argument('--clean', help=f"Remove files from previous runs, default is {GLOBAL_CONFIG['clean']}",
action='store_true',
default=GLOBAL_CONFIG['clean'])
subparser.add_argument('--overwrite',
help=f"Overwrite output files when they exist, default is {GLOBAL_CONFIG['overwrite']}",
action='store_true',
default=GLOBAL_CONFIG['overwrite'])
subparser.add_argument('--debug',
help=f"Run extra steps for debugging issues, default is {GLOBAL_CONFIG['debug']}",
action='store_true',
default=GLOBAL_CONFIG['debug'])
if textgrid_output:
subparser.add_argument('--disable_textgrid_cleanup',
help=f"Disable extra clean up steps on TextGrid output, default is {not GLOBAL_CONFIG['cleanup_textgrids']}",
action='store_true',
default=not GLOBAL_CONFIG['cleanup_textgrids'])
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="subcommand")
subparsers.required = True
version_parser = subparsers.add_parser('version')
align_parser = subparsers.add_parser('align')
align_parser.add_argument('corpus_directory', help="Full path to the directory to align")
align_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
align_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
align_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
align_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for alignment")
align_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
align_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
add_global_options(align_parser, textgrid_output=True)
adapt_parser = subparsers.add_parser('adapt')
adapt_parser.add_argument('corpus_directory', help="Full path to the directory to align")
adapt_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
adapt_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
adapt_parser.add_argument('output_model_path',
help="Full path to save adapted_model")
adapt_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
adapt_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for alignment")
adapt_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
adapt_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
add_global_options(adapt_parser, textgrid_output=True)
train_parser = subparsers.add_parser('train')
train_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
train_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
default='')
train_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
train_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for training and alignment")
train_parser.add_argument('-o', '--output_model_path', type=str, default='',
help="Full path to save resulting acoustic and dictionary model")
train_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of filenames to use for determining speaker, "
'default is to use directory names')
train_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
train_parser.add_argument('-m', '--acoustic_model_path', type=str, default='',
help="Full path to save adapted_model")
add_global_options(train_parser, textgrid_output=True)
validate_parser = subparsers.add_parser('validate')
validate_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
validate_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
default='')
validate_parser.add_argument('acoustic_model_path', nargs='?', default='',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
validate_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
validate_parser.add_argument('--test_transcriptions', help="Test accuracy of transcriptions", action='store_true')
validate_parser.add_argument('--ignore_acoustics',
help="Skip acoustic feature generation and associated validation",
action='store_true')
add_global_options(validate_parser)
g2p_model_help_message = f'''Full path to the archive containing pre-trained model or language ({', '.join(g2p_languages)})
If not specified, then orthographic transcription is split into pronunciations.'''
g2p_parser = subparsers.add_parser('g2p')
g2p_parser.add_argument("g2p_model_path", help=g2p_model_help_message, nargs='?')
g2p_parser.add_argument("input_path",
help="Corpus to base word list on or a text file of words to generate pronunciations")
g2p_parser.add_argument("output_path", help="Path to save output dictionary")
g2p_parser.add_argument('--include_bracketed', help="Included words enclosed by brackets, i.e. [...], (...), <...>",
action='store_true')
g2p_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for G2P")
add_global_options(g2p_parser)
train_g2p_parser = subparsers.add_parser('train_g2p')
train_g2p_parser.add_argument("dictionary_path", help="Location of existing dictionary")
train_g2p_parser.add_argument("output_model_path", help="Desired location of generated model")
train_g2p_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for G2P")
train_g2p_parser.add_argument("--validate", action='store_true',
help="Perform an analysis of accuracy training on "
"most of the data and validating on an unseen subset")
add_global_options(train_g2p_parser)
download_parser = subparsers.add_parser('download')
download_parser.add_argument("model_type",
help="Type of model to download, one of 'acoustic', 'g2p', or 'dictionary'")
download_parser.add_argument("language", help="Name of language code to download, if not specified, "
"will list all available languages", nargs='?')
train_lm_parser = subparsers.add_parser('train_lm')
train_lm_parser.add_argument('source_path', help="Full path to the source directory to train from, alternatively "
'an ARPA format language model to convert for MFA use')
train_lm_parser.add_argument('output_model_path', type=str,
help="Full path to save resulting language model")
train_lm_parser.add_argument('-m', '--model_path', type=str,
help="Full path to existing language model to merge probabilities")
train_lm_parser.add_argument('-w', '--model_weight', type=float, default=1.0,
help="Weight factor for supplemental language model, defaults to 1.0")
train_lm_parser.add_argument('--dictionary_path', help="Full path to the pronunciation dictionary to use",
default='')
train_lm_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for training and alignment")
add_global_options(train_lm_parser)
train_dictionary_parser = subparsers.add_parser('train_dictionary')
train_dictionary_parser.add_argument('corpus_directory', help="Full path to the directory to align")
train_dictionary_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
train_dictionary_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
train_dictionary_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
train_dictionary_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for alignment")
train_dictionary_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
add_global_options(train_dictionary_parser)
train_ivector_parser = subparsers.add_parser('train_ivector')
train_ivector_parser.add_argument('corpus_directory', help="Full path to the source directory to "
'train the ivector extractor')
train_ivector_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
train_ivector_parser.add_argument('acoustic_model_path', type=str, default='',
help="Full path to acoustic model for alignment")
train_ivector_parser.add_argument('output_model_path', type=str, default='',
help="Full path to save resulting ivector extractor")
train_ivector_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of filenames to use for determining speaker, "
'default is to use directory names')
train_ivector_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for training")
add_global_options(train_ivector_parser)
classify_speakers_parser = subparsers.add_parser('classify_speakers')
classify_speakers_parser.add_argument('corpus_directory', help="Full path to the source directory to "
'run speaker classification')
classify_speakers_parser.add_argument('ivector_extractor_path', type=str, default='',
help="Full path to ivector extractor model")
classify_speakers_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
classify_speakers_parser.add_argument('-s', '--num_speakers', type=int, default=0,
help="Number of speakers if known")
classify_speakers_parser.add_argument('--cluster', help="Using clustering instead of classification",
action='store_true')
classify_speakers_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for ivector extraction")
add_global_options(classify_speakers_parser)
create_segments_parser = subparsers.add_parser('create_segments')
create_segments_parser.add_argument('corpus_directory', help="Full path to the source directory to "
'run VAD segmentation')
create_segments_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
create_segments_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for segmentation")
add_global_options(create_segments_parser)
transcribe_parser = subparsers.add_parser('transcribe')
transcribe_parser.add_argument('corpus_directory', help="Full path to the directory to transcribe")
transcribe_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
transcribe_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
transcribe_parser.add_argument('language_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(lm_languages)})")
transcribe_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
transcribe_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for transcription")
transcribe_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
transcribe_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
transcribe_parser.add_argument('-e', '--evaluate', help="Evaluate the transcription "
"against golden texts", action='store_true')
add_global_options(transcribe_parser)
config_parser = subparsers.add_parser('configure',
help="The configure command is used to set global defaults for MFA so "
"you don't have to set them every time you call an MFA command.")
config_parser.add_argument('-t', '--temp_directory', type=str, default='',
help=f"Set the default temporary directory, default is {GLOBAL_CONFIG['temp_directory']}")
config_parser.add_argument('-j', '--num_jobs', type=int,
help=f"Set the number of processes to use by default, defaults to {GLOBAL_CONFIG['num_jobs']}")
config_parser.add_argument('--always_clean', help="Always remove files from previous runs by default",
action='store_true')
config_parser.add_argument('--never_clean', help="Don't remove files from previous runs by default",
action='store_true')
config_parser.add_argument('--always_verbose', help="Default to verbose output", action='store_true')
config_parser.add_argument('--never_verbose', help="Default to non-verbose output", action='store_true')
config_parser.add_argument('--always_debug', help="Default to running debugging steps", action='store_true')
config_parser.add_argument('--never_debug', help="Default to not running debugging steps", action='store_true')
config_parser.add_argument('--always_overwrite', help="Always overwrite output files", action='store_true')
config_parser.add_argument('--never_overwrite', help="Never overwrite output files (if file already exists, "
"the output will be saved in the temp directory)",
action='store_true')
config_parser.add_argument('--disable_mp', help="Disable all multiprocessing (not recommended as it will usually "
"increase processing times)", action='store_true')
config_parser.add_argument('--enable_mp', help="Enable multiprocessing (recommended and enabled by default)",
action='store_true')
config_parser.add_argument('--disable_textgrid_cleanup', help="Disable postprocessing of TextGrids that cleans up "
"silences and recombines compound words and clitics",
action='store_true')
config_parser.add_argument('--enable_textgrid_cleanup', help="Enable postprocessing of TextGrids that cleans up "
"silences and recombines compound words and clitics",
action='store_true')
history_parser = subparsers.add_parser('history')
history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10)
history_parser.add_argument('--verbose', help="Flag for whether to output additional information",
action='store_true')
annotator_parser = subparsers.add_parser('annotator')
anchor_parser = subparsers.add_parser('anchor')
thirdparty_parser = subparsers.add_parser('thirdparty')
thirdparty_parser.add_argument("command",
help="One of 'download', 'validate', or 'kaldi'")
thirdparty_parser.add_argument('local_directory',
help="Full path to the built executables to collect", nargs="?",
default='')
return parser
parser = create_parser()
def main():
parser = create_parser()
mp.freeze_support()
args, unknown = parser.parse_known_args()
for short in ['-c', '-d']:
if short in unknown:
print(f'Due to the number of options that `{short}` could refer to, it is not accepted. '
'Please specify the full argument')
sys.exit(1)
try:
fix_path()
if args.subcommand in ['align', 'train', 'train_ivector']:
from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries
if not validate_alignment_binaries():
print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
"on why this check failed.")
sys.exit(1)
elif args.subcommand in ['transcribe']:
from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries
if not validate_transcribe_binaries():
print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
"via the download command do not contain the transcription ones. To get this functionality working "
"for the time being, please build kaldi locally and follow the instructions for running the "
"'mfa thirdparty kaldi' command.")
sys.exit(1)
elif args.subcommand in ['train_dictionary']:
from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries
if not validate_train_dictionary_binaries():
print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
"via the download command do not contain the train_dictionary ones. To get this functionality working "
"for the time being, please build kaldi locally and follow the instructions for running the "
"'mfa thirdparty kaldi' command.")
sys.exit(1)
elif args.subcommand in ['g2p', 'train_g2p']:
try:
import pynini
except ImportError:
print("There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, "
"please use the Windows Subsystem for Linux to use g2p functionality.")
sys.exit(1)
if args.subcommand == 'align':
run_align_corpus(args, unknown, acoustic_languages)
elif args.subcommand == 'adapt':
run_adapt_model(args, unknown, acoustic_languages)
elif args.subcommand == 'train':
run_train_corpus(args, unknown)
elif args.subcommand == 'g2p':
run_g2p(args, unknown, g2p_languages)
elif args.subcommand == 'train_g2p':
run_train_g2p(args, unknown)
elif args.subcommand == 'validate':
run_validate_corpus(args, unknown)
elif args.subcommand == 'download':
run_download(args)
elif args.subcommand == 'train_lm':
run_train_lm(args, unknown)
elif args.subcommand == 'train_dictionary':
run_train_dictionary(args, unknown)
elif args.subcommand == 'train_ivector':
run_train_ivector_extractor(args, unknown)
elif args.subcommand == 'classify_speakers':
run_classify_speakers(args, unknown)
elif args.subcommand in ['annotator', 'anchor']:
from montreal_forced_aligner.command_line.anchor import run_anchor
run_anchor(args)
elif args.subcommand == 'thirdparty':
run_thirdparty(args)
elif args.subcommand == 'transcribe':
run_transcribe_corpus(args, unknown)
elif args.subcommand == 'create_segments':
run_create_segments(args, unknown)
elif args.subcommand == 'configure':
update_global_config(args)
global GLOBAL_CONFIG
GLOBAL_CONFIG = load_global_config()
elif args.subcommand == 'history':
depth = args.depth
history = load_command_history()[-depth:]
for h in history:
if args.verbose:
print('command\tDate\tExecution time\tVersion\tExit code\tException')
for h in history:
execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time']))
d = h['date'].isoformat()
print(
f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}")
pass
else:
for h in history:
print(h['command'])
elif args.subcommand == 'version':
print(__version__)
except MFAError as e:
if getattr(args, 'debug', False):
raise
print(e)
sys.exit(1)
finally:
unfix_path()
if __name__ == '__main__':
main()