import atexit import sys import os import time import argparse from datetime import datetime import multiprocessing as mp from montreal_forced_aligner import __version__ from montreal_forced_aligner.utils import get_available_acoustic_languages, get_available_g2p_languages, \ get_available_dict_languages, get_available_lm_languages, get_available_ivector_languages from montreal_forced_aligner.command_line.align import run_align_corpus from mfa_usr.adapt import run_adapt_model from montreal_forced_aligner.command_line.train_and_align import run_train_corpus from montreal_forced_aligner.command_line.g2p import run_g2p from montreal_forced_aligner.command_line.train_g2p import run_train_g2p from montreal_forced_aligner.command_line.validate import run_validate_corpus from montreal_forced_aligner.command_line.download import run_download from montreal_forced_aligner.command_line.train_lm import run_train_lm from montreal_forced_aligner.command_line.thirdparty import run_thirdparty from montreal_forced_aligner.command_line.train_ivector_extractor import run_train_ivector_extractor from montreal_forced_aligner.command_line.classify_speakers import run_classify_speakers from montreal_forced_aligner.command_line.transcribe import run_transcribe_corpus from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary from montreal_forced_aligner.command_line.create_segments import run_create_segments from montreal_forced_aligner.exceptions import MFAError from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, \ load_command_history class ExitHooks(object): def __init__(self): self.exit_code = None self.exception = None def hook(self): self._orig_exit = sys.exit sys.exit = self.exit sys.excepthook = self.exc_handler def exit(self, code=0): self.exit_code = code self._orig_exit(code) def exc_handler(self, exc_type, exc, *args): self.exception = exc hooks = ExitHooks() hooks.hook() BEGIN = time.time() BEGIN_DATE = datetime.now() def history_save_handler(): history_data = { 'command': ' '.join(sys.argv), 'execution_time': time.time() - BEGIN, 'date': BEGIN_DATE, 'version': __version__ } if hooks.exit_code is not None: history_data['exit_code'] = hooks.exit_code history_data['exception'] = '' elif hooks.exception is not None: history_data['exit_code'] = 1 history_data['exception'] = hooks.exception else: history_data['exception'] = '' history_data['exit_code'] = 0 update_command_history(history_data) atexit.register(history_save_handler) def fix_path(): from montreal_forced_aligner.config import TEMP_DIR thirdparty_dir = os.path.join(TEMP_DIR, 'thirdparty', 'bin') old_path = os.environ.get('PATH', '') if sys.platform == 'win32': os.environ['PATH'] = thirdparty_dir + ';' + old_path else: os.environ['PATH'] = thirdparty_dir + ':' + old_path os.environ['LD_LIBRARY_PATH'] = thirdparty_dir + ':' + os.environ.get('LD_LIBRARY_PATH', '') def unfix_path(): if sys.platform == 'win32': sep = ';' os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:]) else: sep = ':' os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:]) os.environ['LD_LIBRARY_PATH'] = sep.join(os.environ['PATH'].split(sep)[1:]) acoustic_languages = get_available_acoustic_languages() ivector_languages = get_available_ivector_languages() lm_languages = get_available_lm_languages() g2p_languages = get_available_g2p_languages() dict_languages = get_available_dict_languages() def create_parser(): GLOBAL_CONFIG = load_global_config() def add_global_options(subparser, textgrid_output=False): subparser.add_argument('-t', '--temp_directory', type=str, default=GLOBAL_CONFIG['temp_directory'], help=f"Temporary directory root to store MFA created files, default is {GLOBAL_CONFIG['temp_directory']}") subparser.add_argument('--disable_mp', help=f"Disable any multiprocessing during alignment (not recommended), default is {not GLOBAL_CONFIG['use_mp']}", action='store_true', default=not GLOBAL_CONFIG['use_mp']) subparser.add_argument('-j', '--num_jobs', type=int, default=GLOBAL_CONFIG['num_jobs'], help=f"Number of data splits (and cores to use if multiprocessing is enabled), defaults " f"is {GLOBAL_CONFIG['num_jobs']}") subparser.add_argument('-v', '--verbose', help=f"Output debug messages, default is {GLOBAL_CONFIG['verbose']}", action='store_true', default=GLOBAL_CONFIG['verbose']) subparser.add_argument('--clean', help=f"Remove files from previous runs, default is {GLOBAL_CONFIG['clean']}", action='store_true', default=GLOBAL_CONFIG['clean']) subparser.add_argument('--overwrite', help=f"Overwrite output files when they exist, default is {GLOBAL_CONFIG['overwrite']}", action='store_true', default=GLOBAL_CONFIG['overwrite']) subparser.add_argument('--debug', help=f"Run extra steps for debugging issues, default is {GLOBAL_CONFIG['debug']}", action='store_true', default=GLOBAL_CONFIG['debug']) if textgrid_output: subparser.add_argument('--disable_textgrid_cleanup', help=f"Disable extra clean up steps on TextGrid output, default is {not GLOBAL_CONFIG['cleanup_textgrids']}", action='store_true', default=not GLOBAL_CONFIG['cleanup_textgrids']) parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest="subcommand") subparsers.required = True version_parser = subparsers.add_parser('version') align_parser = subparsers.add_parser('align') align_parser.add_argument('corpus_directory', help="Full path to the directory to align") align_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use") align_parser.add_argument('acoustic_model_path', help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})") align_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") align_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for alignment") align_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of file names to use for determining speaker, " 'default is to use directory names') align_parser.add_argument('-a', '--audio_directory', type=str, default='', help="Audio directory root to use for finding audio files") add_global_options(align_parser, textgrid_output=True) adapt_parser = subparsers.add_parser('adapt') adapt_parser.add_argument('corpus_directory', help="Full path to the directory to align") adapt_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use") adapt_parser.add_argument('acoustic_model_path', help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})") adapt_parser.add_argument('output_model_path', help="Full path to save adapted_model") adapt_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") adapt_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for alignment") adapt_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of file names to use for determining speaker, " 'default is to use directory names') adapt_parser.add_argument('-a', '--audio_directory', type=str, default='', help="Audio directory root to use for finding audio files") add_global_options(adapt_parser, textgrid_output=True) train_parser = subparsers.add_parser('train') train_parser.add_argument('corpus_directory', help="Full path to the source directory to align") train_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use", default='') train_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") train_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for training and alignment") train_parser.add_argument('-o', '--output_model_path', type=str, default='', help="Full path to save resulting acoustic and dictionary model") train_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of filenames to use for determining speaker, " 'default is to use directory names') train_parser.add_argument('-a', '--audio_directory', type=str, default='', help="Audio directory root to use for finding audio files") train_parser.add_argument('-m', '--acoustic_model_path', type=str, default='', help="Full path to save adapted_model") add_global_options(train_parser, textgrid_output=True) validate_parser = subparsers.add_parser('validate') validate_parser.add_argument('corpus_directory', help="Full path to the source directory to align") validate_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use", default='') validate_parser.add_argument('acoustic_model_path', nargs='?', default='', help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})") validate_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of file names to use for determining speaker, " 'default is to use directory names') validate_parser.add_argument('--test_transcriptions', help="Test accuracy of transcriptions", action='store_true') validate_parser.add_argument('--ignore_acoustics', help="Skip acoustic feature generation and associated validation", action='store_true') add_global_options(validate_parser) g2p_model_help_message = f'''Full path to the archive containing pre-trained model or language ({', '.join(g2p_languages)}) If not specified, then orthographic transcription is split into pronunciations.''' g2p_parser = subparsers.add_parser('g2p') g2p_parser.add_argument("g2p_model_path", help=g2p_model_help_message, nargs='?') g2p_parser.add_argument("input_path", help="Corpus to base word list on or a text file of words to generate pronunciations") g2p_parser.add_argument("output_path", help="Path to save output dictionary") g2p_parser.add_argument('--include_bracketed', help="Included words enclosed by brackets, i.e. [...], (...), <...>", action='store_true') g2p_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for G2P") add_global_options(g2p_parser) train_g2p_parser = subparsers.add_parser('train_g2p') train_g2p_parser.add_argument("dictionary_path", help="Location of existing dictionary") train_g2p_parser.add_argument("output_model_path", help="Desired location of generated model") train_g2p_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for G2P") train_g2p_parser.add_argument("--validate", action='store_true', help="Perform an analysis of accuracy training on " "most of the data and validating on an unseen subset") add_global_options(train_g2p_parser) download_parser = subparsers.add_parser('download') download_parser.add_argument("model_type", help="Type of model to download, one of 'acoustic', 'g2p', or 'dictionary'") download_parser.add_argument("language", help="Name of language code to download, if not specified, " "will list all available languages", nargs='?') train_lm_parser = subparsers.add_parser('train_lm') train_lm_parser.add_argument('source_path', help="Full path to the source directory to train from, alternatively " 'an ARPA format language model to convert for MFA use') train_lm_parser.add_argument('output_model_path', type=str, help="Full path to save resulting language model") train_lm_parser.add_argument('-m', '--model_path', type=str, help="Full path to existing language model to merge probabilities") train_lm_parser.add_argument('-w', '--model_weight', type=float, default=1.0, help="Weight factor for supplemental language model, defaults to 1.0") train_lm_parser.add_argument('--dictionary_path', help="Full path to the pronunciation dictionary to use", default='') train_lm_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for training and alignment") add_global_options(train_lm_parser) train_dictionary_parser = subparsers.add_parser('train_dictionary') train_dictionary_parser.add_argument('corpus_directory', help="Full path to the directory to align") train_dictionary_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use") train_dictionary_parser.add_argument('acoustic_model_path', help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})") train_dictionary_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") train_dictionary_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for alignment") train_dictionary_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of file names to use for determining speaker, " 'default is to use directory names') add_global_options(train_dictionary_parser) train_ivector_parser = subparsers.add_parser('train_ivector') train_ivector_parser.add_argument('corpus_directory', help="Full path to the source directory to " 'train the ivector extractor') train_ivector_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use") train_ivector_parser.add_argument('acoustic_model_path', type=str, default='', help="Full path to acoustic model for alignment") train_ivector_parser.add_argument('output_model_path', type=str, default='', help="Full path to save resulting ivector extractor") train_ivector_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of filenames to use for determining speaker, " 'default is to use directory names') train_ivector_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for training") add_global_options(train_ivector_parser) classify_speakers_parser = subparsers.add_parser('classify_speakers') classify_speakers_parser.add_argument('corpus_directory', help="Full path to the source directory to " 'run speaker classification') classify_speakers_parser.add_argument('ivector_extractor_path', type=str, default='', help="Full path to ivector extractor model") classify_speakers_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") classify_speakers_parser.add_argument('-s', '--num_speakers', type=int, default=0, help="Number of speakers if known") classify_speakers_parser.add_argument('--cluster', help="Using clustering instead of classification", action='store_true') classify_speakers_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for ivector extraction") add_global_options(classify_speakers_parser) create_segments_parser = subparsers.add_parser('create_segments') create_segments_parser.add_argument('corpus_directory', help="Full path to the source directory to " 'run VAD segmentation') create_segments_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") create_segments_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for segmentation") add_global_options(create_segments_parser) transcribe_parser = subparsers.add_parser('transcribe') transcribe_parser.add_argument('corpus_directory', help="Full path to the directory to transcribe") transcribe_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use") transcribe_parser.add_argument('acoustic_model_path', help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})") transcribe_parser.add_argument('language_model_path', help=f"Full path to the archive containing pre-trained model or language ({', '.join(lm_languages)})") transcribe_parser.add_argument('output_directory', help="Full path to output directory, will be created if it doesn't exist") transcribe_parser.add_argument('--config_path', type=str, default='', help="Path to config file to use for transcription") transcribe_parser.add_argument('-s', '--speaker_characters', type=str, default='0', help="Number of characters of file names to use for determining speaker, " 'default is to use directory names') transcribe_parser.add_argument('-a', '--audio_directory', type=str, default='', help="Audio directory root to use for finding audio files") transcribe_parser.add_argument('-e', '--evaluate', help="Evaluate the transcription " "against golden texts", action='store_true') add_global_options(transcribe_parser) config_parser = subparsers.add_parser('configure', help="The configure command is used to set global defaults for MFA so " "you don't have to set them every time you call an MFA command.") config_parser.add_argument('-t', '--temp_directory', type=str, default='', help=f"Set the default temporary directory, default is {GLOBAL_CONFIG['temp_directory']}") config_parser.add_argument('-j', '--num_jobs', type=int, help=f"Set the number of processes to use by default, defaults to {GLOBAL_CONFIG['num_jobs']}") config_parser.add_argument('--always_clean', help="Always remove files from previous runs by default", action='store_true') config_parser.add_argument('--never_clean', help="Don't remove files from previous runs by default", action='store_true') config_parser.add_argument('--always_verbose', help="Default to verbose output", action='store_true') config_parser.add_argument('--never_verbose', help="Default to non-verbose output", action='store_true') config_parser.add_argument('--always_debug', help="Default to running debugging steps", action='store_true') config_parser.add_argument('--never_debug', help="Default to not running debugging steps", action='store_true') config_parser.add_argument('--always_overwrite', help="Always overwrite output files", action='store_true') config_parser.add_argument('--never_overwrite', help="Never overwrite output files (if file already exists, " "the output will be saved in the temp directory)", action='store_true') config_parser.add_argument('--disable_mp', help="Disable all multiprocessing (not recommended as it will usually " "increase processing times)", action='store_true') config_parser.add_argument('--enable_mp', help="Enable multiprocessing (recommended and enabled by default)", action='store_true') config_parser.add_argument('--disable_textgrid_cleanup', help="Disable postprocessing of TextGrids that cleans up " "silences and recombines compound words and clitics", action='store_true') config_parser.add_argument('--enable_textgrid_cleanup', help="Enable postprocessing of TextGrids that cleans up " "silences and recombines compound words and clitics", action='store_true') history_parser = subparsers.add_parser('history') history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10) history_parser.add_argument('--verbose', help="Flag for whether to output additional information", action='store_true') annotator_parser = subparsers.add_parser('annotator') anchor_parser = subparsers.add_parser('anchor') thirdparty_parser = subparsers.add_parser('thirdparty') thirdparty_parser.add_argument("command", help="One of 'download', 'validate', or 'kaldi'") thirdparty_parser.add_argument('local_directory', help="Full path to the built executables to collect", nargs="?", default='') return parser parser = create_parser() def main(): parser = create_parser() mp.freeze_support() args, unknown = parser.parse_known_args() for short in ['-c', '-d']: if short in unknown: print(f'Due to the number of options that `{short}` could refer to, it is not accepted. ' 'Please specify the full argument') sys.exit(1) try: fix_path() if args.subcommand in ['align', 'train', 'train_ivector']: from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries if not validate_alignment_binaries(): print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the " "'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information " "on why this check failed.") sys.exit(1) elif args.subcommand in ['transcribe']: from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries if not validate_transcribe_binaries(): print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the " "'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information " "on why this check failed. If you are on MacOS, please note that the thirdparty binaries available " "via the download command do not contain the transcription ones. To get this functionality working " "for the time being, please build kaldi locally and follow the instructions for running the " "'mfa thirdparty kaldi' command.") sys.exit(1) elif args.subcommand in ['train_dictionary']: from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries if not validate_train_dictionary_binaries(): print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the " "'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information " "on why this check failed. If you are on MacOS, please note that the thirdparty binaries available " "via the download command do not contain the train_dictionary ones. To get this functionality working " "for the time being, please build kaldi locally and follow the instructions for running the " "'mfa thirdparty kaldi' command.") sys.exit(1) elif args.subcommand in ['g2p', 'train_g2p']: try: import pynini except ImportError: print("There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, " "please use the Windows Subsystem for Linux to use g2p functionality.") sys.exit(1) if args.subcommand == 'align': run_align_corpus(args, unknown, acoustic_languages) elif args.subcommand == 'adapt': run_adapt_model(args, unknown, acoustic_languages) elif args.subcommand == 'train': run_train_corpus(args, unknown) elif args.subcommand == 'g2p': run_g2p(args, unknown, g2p_languages) elif args.subcommand == 'train_g2p': run_train_g2p(args, unknown) elif args.subcommand == 'validate': run_validate_corpus(args, unknown) elif args.subcommand == 'download': run_download(args) elif args.subcommand == 'train_lm': run_train_lm(args, unknown) elif args.subcommand == 'train_dictionary': run_train_dictionary(args, unknown) elif args.subcommand == 'train_ivector': run_train_ivector_extractor(args, unknown) elif args.subcommand == 'classify_speakers': run_classify_speakers(args, unknown) elif args.subcommand in ['annotator', 'anchor']: from montreal_forced_aligner.command_line.anchor import run_anchor run_anchor(args) elif args.subcommand == 'thirdparty': run_thirdparty(args) elif args.subcommand == 'transcribe': run_transcribe_corpus(args, unknown) elif args.subcommand == 'create_segments': run_create_segments(args, unknown) elif args.subcommand == 'configure': update_global_config(args) global GLOBAL_CONFIG GLOBAL_CONFIG = load_global_config() elif args.subcommand == 'history': depth = args.depth history = load_command_history()[-depth:] for h in history: if args.verbose: print('command\tDate\tExecution time\tVersion\tExit code\tException') for h in history: execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time'])) d = h['date'].isoformat() print( f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}") pass else: for h in history: print(h['command']) elif args.subcommand == 'version': print(__version__) except MFAError as e: if getattr(args, 'debug', False): raise print(e) sys.exit(1) finally: unfix_path() if __name__ == '__main__': main()