import re contractions = { r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?", r"(?', r'^em a(s)?$': r'na\g<1>', r'^de a(s)?$': r'da\g<1>', r'^de o(s)?$': r'do\g<1>', r'^a o(s)?$': r'ao\g<1>', r'^a a(s)?$': r'à\g<1>', r'^por a(s)?$': r'pela\g<1>', r'^por o(s)?$': r'pelo\g<1>', r'^em esta(s)?$': r'nesta\g<1>', r'^em este(s)?$': r'neste\g<1>', r'^em essa(s)?$': r'nessa\g<1>', r'^em esse(s)?$': r'nesse\g<1>', r'^em um$': r'num', r'^em uns$': r'nuns', r'^em uma(s)?$': r'numa\g<1>', r'^em isso$': r'nisso', r'^em aquele(s)?$': r'naquele\g<1>', r'^em aquela(s)?$': r'naquela\g<1>', r'^em aquilo$': r'naquilo', r'^de uma(s)?$': r'duma\g<1>', r'^de aqui$': r'daqui', r'^de ali$': r'dali', r'^de aquele(s)?$': r'daquele\g<1>', r'^de aquela(s)?$': r'daquela\g<1>', r'^de este(s)?$': r'deste\g<1>', r'^de esta(s)?$': r'desta\g<1>', r'^de esse(s)?$': r'desse\g<1>', r'^de essa(s)?$': r'dessa\g<1>', r'^de aí$': r'daí', r'^de um$': r'dum', r'^de onde$': r'donde', r'^de isto$': r'disto', r'^de isso$': r'disso', r'^de aquilo$': r'daquilo', r'^de ela(s)?$': r"dela\g<1>", r'^de ele(s)?$': r"dele\g<1>", r'^em isto$': r'nisto', r'^em ele(s)?$': r'nele\g<1>', r'^em ela(s)?$': r'nela\g<1>', r'^em outro(s)?$': r'noutro\g<1>', r'^a onde$': r'aonde', r'^a aquela(s)?$': r'àquela\g<1>', r'^a aquele(s)?$': r'àquele\g<1>', r'^a aquilo$': r'àquilo', r'^com ti$': r'contigo', r'^não é$': r'né', r'^com mim$': r'comigo', r'^com nós$': r'conosco', r'^com si$': r'consigo', r'^para a$': r'pra', r'^para o$': r'pro' } def replace_keep_case(word, replacement, text): """ Custom function for replace keeping the original case. Parameters ---------- word: str Text to be replaced. replacement: str String to replace word. text: Text to be processed. Returns ------- str: Processed string """ def func(match): g = match.group() repl = match.expand(replacement) if g.islower(): return repl.lower() if g.istitle(): return repl.capitalize() if g.isupper(): return repl.upper() if g[0].isupper(): return repl[0].upper() + repl[1:] return repl return re.sub(word, func, text, flags=re.I) def expand_contractions(text: str) -> str: """ Replace contractions to their based form. Parameters ---------- text: str Text that may contain contractions. Returns ------- str: Text with expanded contractions. """ for contraction in contractions.keys(): replace_str = contractions[contraction] text = replace_keep_case(contraction, replace_str, text) return text