# Copyright (c) 2024 NVIDIA CORPORATION. # Licensed under the MIT license. import os import string import yaml from copy import deepcopy import torch from transformers import AutoTokenizer, set_seed set_seed(0) from data import AudioTextDataProcessor from src.factory import create_model_and_transforms def prepare_tokenizer(model_config): tokenizer_path = model_config['tokenizer_path'] cache_dir = model_config['cache_dir'] text_tokenizer = AutoTokenizer.from_pretrained( tokenizer_path, local_files_only=False, trust_remote_code=True, cache_dir=cache_dir, ) text_tokenizer.add_special_tokens( {"additional_special_tokens": ["