# Copied from https://github.com/fahadh4ilyas/syllable_splitter # MIT License import re class SyllableSplitter: def __init__(self): self.consonant = set( [ "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z", "ng", "ny", "sy", "ch", "dh", "gh", "kh", "ph", "sh", "th", ] ) self.double_consonant = set(["ll", "ks", "rs", "rt", "nk", "nd"]) self.vocal = set(["a", "e", "ê", "é", "è", "i", "o", "u"]) def split_letters(self, string): letters = [] arrange = [] while string != "": letter = string[:2] if letter in self.double_consonant: if string[2:] != "" and string[2] in self.vocal: letters += [letter[0]] arrange += ["c"] string = string[1:] else: letters += [letter] arrange += ["c"] string = string[2:] elif letter in self.consonant: letters += [letter] arrange += ["c"] string = string[2:] elif letter in self.vocal: letters += [letter] arrange += ["v"] string = string[2:] else: letter = string[0] if letter in self.consonant: letters += [letter] arrange += ["c"] string = string[1:] elif letter in self.vocal: letters += [letter] arrange += ["v"] string = string[1:] else: letters += [letter] arrange += ["s"] string = string[1:] return letters, "".join(arrange) def split_syllables_from_letters(self, letters, arrange): consonant_index = re.search(r"vc{2,}", arrange) while consonant_index: i = consonant_index.start() + 1 letters = letters[: i + 1] + ["|"] + letters[i + 1 :] arrange = arrange[: i + 1] + "|" + arrange[i + 1 :] consonant_index = re.search(r"vc{2,}", arrange) vocal_index = re.search(r"v{2,}", arrange) while vocal_index: i = vocal_index.start() letters = letters[: i + 1] + ["|"] + letters[i + 1 :] arrange = arrange[: i + 1] + "|" + arrange[i + 1 :] vocal_index = re.search(r"v{2,}", arrange) vcv_index = re.search(r"vcv", arrange) while vcv_index: i = vcv_index.start() letters = letters[: i + 1] + ["|"] + letters[i + 1 :] arrange = arrange[: i + 1] + "|" + arrange[i + 1 :] vcv_index = re.search(r"vcv", arrange) sep_index = re.search(r"[cvs]s", arrange) while sep_index: i = sep_index.start() letters = letters[: i + 1] + ["|"] + letters[i + 1 :] arrange = arrange[: i + 1] + "|" + arrange[i + 1 :] sep_index = re.search(r"[cvs]s", arrange) sep_index = re.search(r"s[cvs]", arrange) while sep_index: i = sep_index.start() letters = letters[: i + 1] + ["|"] + letters[i + 1 :] arrange = arrange[: i + 1] + "|" + arrange[i + 1 :] sep_index = re.search(r"s[cvs]", arrange) return "".join(letters).split("|") def split_syllables(self, string): letters, arrange = self.split_letters(string) return self.split_syllables_from_letters(letters, arrange)