from typing import Collection from jaconv import jaconv import tacotron_cleaner.cleaners from typeguard import check_argument_types try: from vietnamese_cleaner import vietnamese_cleaners except ImportError: vietnamese_cleaners = None class TextCleaner: """Text cleaner. Examples: >>> cleaner = TextCleaner("tacotron") >>> cleaner("(Hello-World); & jr. & dr.") 'HELLO WORLD, AND JUNIOR AND DOCTOR' """ def __init__(self, cleaner_types: Collection[str] = None): assert check_argument_types() if cleaner_types is None: self.cleaner_types = [] elif isinstance(cleaner_types, str): self.cleaner_types = [cleaner_types] else: self.cleaner_types = list(cleaner_types) def __call__(self, text: str) -> str: for t in self.cleaner_types: if t == "tacotron": text = tacotron_cleaner.cleaners.custom_english_cleaners(text) elif t == "jaconv": text = jaconv.normalize(text) elif t == "vietnamese": if vietnamese_cleaners is None: raise RuntimeError("Please install underthesea") text = vietnamese_cleaners.vietnamese_cleaner(text) else: raise RuntimeError(f"Not supported: type={t}") return text