|
|
|
"""Recipe for training a BPE tokenizer for Fluent Speech Commands. |
|
The tokenizer coverts semantics into sub-word units that can |
|
be used to train a language (LM) or an acoustic model (AM). |
|
|
|
To run this recipe, do the following: |
|
> python train.py hparams/tokenizer_bpe51.yaml |
|
|
|
|
|
Authors |
|
* Abdel Heba 2021 |
|
* Mirco Ravanelli 2021 |
|
* Loren Lugosch 2021 |
|
""" |
|
|
|
import sys |
|
import speechbrain as sb |
|
from hyperpyyaml import load_hyperpyyaml |
|
from speechbrain.utils.distributed import run_on_main |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:]) |
|
with open(hparams_file) as fin: |
|
hparams = load_hyperpyyaml(fin, overrides) |
|
|
|
|
|
|
|
sb.utils.distributed.ddp_init_group(run_opts) |
|
|
|
|
|
sb.create_experiment_directory( |
|
experiment_directory=hparams["output_folder"], |
|
hyperparams_to_save=hparams_file, |
|
overrides=overrides, |
|
) |
|
|
|
|
|
from prepare import prepare_FSC |
|
|
|
|
|
run_on_main( |
|
prepare_FSC, |
|
kwargs={ |
|
"data_folder": hparams["data_folder"], |
|
"save_folder": hparams["output_folder"], |
|
"skip_prep": hparams["skip_prep"], |
|
}, |
|
) |
|
|
|
|
|
hparams["tokenizer"]() |
|
|