Podcastify / melo /text /english_utils /abbreviations.py
mrfakename's picture
Init
4300fed
raw
history blame
948 Bytes
import re
# List of (regular expression, replacement) pairs for abbreviations in english:
abbreviations_en = [
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
for x in [
("mrs", "misess"),
("mr", "mister"),
("dr", "doctor"),
("st", "saint"),
("co", "company"),
("jr", "junior"),
("maj", "major"),
("gen", "general"),
("drs", "doctors"),
("rev", "reverend"),
("lt", "lieutenant"),
("hon", "honorable"),
("sgt", "sergeant"),
("capt", "captain"),
("esq", "esquire"),
("ltd", "limited"),
("col", "colonel"),
("ft", "fort"),
]
]
def expand_abbreviations(text, lang="en"):
if lang == "en":
_abbreviations = abbreviations_en
else:
raise NotImplementedError()
for regex, replacement in _abbreviations:
text = re.sub(regex, replacement, text)
return text