WR / text /workspace /convert_jyutping_waitau.py
Naozumi0512's picture
init
e62fb95
raw
history blame contribute delete
No virus
1.33 kB
import re
from functools import reduce
ROM_MAPPING = {
"j": "y",
"aa": "a",
"oe": "ö",
"yu": "ü",
"aai": "ai",
"ai": "äi",
"eoi": "öi",
"aau": "au",
"au": "äu",
"aam": "am",
"am": "äm",
"aan": "an",
"an": "än",
"yun": "ün",
"aang": "ang",
"aeng": "æng",
"aeing": "æing",
"ang": "äng",
"aing": "äing",
"oeng": "öng",
"yung": "üng",
"aap": "ap",
"ap": "äp",
"aak": "ak",
"aek": "æk",
"ak": "äk",
"oek": "ök",
"yuk": "ük",
"aat": "at",
"at": "ät",
"eot": "öt",
"yut": "üt",
}
def rom_map(jyutping):
return re.sub(
"(g|k)u(?!ng|k)",
"\\1wu",
reduce(lambda pron, rule: pron.replace(*rule), ROM_MAPPING.items(), jyutping),
)
INITIALS = [
"aa",
"aai",
"aak",
"aap",
"aat",
"aau",
"ai",
"au",
"ap",
"at",
"ak",
"a",
"p",
"b",
"e",
"ts",
"t",
"dz",
"d",
"kw",
"k",
"gw",
"g",
"f",
"h",
"l",
"m",
"ng",
"n",
"s",
"y",
"w",
"c",
"z",
"j",
"ong",
"on",
"ou",
"oi",
"ok",
"o",
"uk",
"ung",
]
translated_INITIALS = [rom_map(symbol) for symbol in INITIALS]
print(translated_INITIALS)