Spaces:
Running
Running
# credits: gleki | |
from __future__ import annotations | |
import sys | |
import os | |
from re import sub, compile | |
from itertools import islice | |
def krulermorna(text: str) -> str: | |
text = sub(r"\.", "", text) | |
text = sub(r"^", ".", text) | |
text = sub(r"u([aeiouy])", r"w\1", text) | |
text = sub(r"i([aeiouy])", r"ɩ\1", text) | |
text = sub(r"au", "ḁ", text) | |
text = sub(r"ai", "ą", text) | |
text = sub(r"ei", "ę", text) | |
text = sub(r"oi", "ǫ", text) | |
text = sub(r"\.", "", text) | |
return text | |
def krulermornaize(words: list[str]) -> list[str]: | |
return [krulermorna(word) for word in words] | |
ipa_vits = { | |
"a$": 'aː', | |
"a": 'aː', | |
# "e(?=v)": 'ɛːʔ', | |
# "e$": 'ɛːʔ', | |
"e": 'ɛː', | |
"i": 'iː', | |
"o": 'oː', | |
"u": 'ʊu', | |
# "u": 'ʊː', | |
"y": 'əː', | |
"ą": 'aɪ', | |
"ę": 'ɛɪ', | |
# "ę(?=\b)(?!')": 'ɛɪʔ', | |
"ǫ": 'ɔɪ', | |
"ḁ": 'aʊ', | |
"ɩa": 'jaː', | |
"ɩe": 'jɛː', | |
"ɩi": 'jiː', | |
"ɩo": 'jɔː', | |
"ɩu": 'juː', | |
"ɩy": 'jəː', | |
"ɩ": 'j', | |
"wa": 'waː', | |
"we": 'wɛː', | |
"wi": 'wiː', | |
"wo": 'wɔː', | |
"wu": 'wuː', | |
"wy": 'wəː', | |
"w": 'w', | |
"c": 'ʃ', | |
# "bj": 'bʒ', | |
"j": 'ʒ', | |
"s": 's', | |
"z": 'z', | |
"f": 'f', | |
"v": 'v', | |
"x": 'hhh', | |
"'": 'h', | |
# "dj":'dʒ', | |
# "tc":'tʃ', | |
# "dz":'ʣ', | |
# "ts":'ʦ', | |
'r': 'ɹ', | |
'r(?![ˈaeiouyḁąęǫ])': 'ɹɹ', | |
# 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ', | |
"nˈu": 'nˈʊuː', | |
"nu": 'nʊuː', | |
"ng": 'n.g', | |
"n": 'n', | |
"m": 'm', | |
"l": 'l', | |
"b": 'b', | |
"d": 'd', | |
"g": 'ɡ', | |
"k": 'k', | |
"p": 'p', | |
"t": 't', | |
"h": 'h' | |
} | |
ipa_nix = { | |
"a$": 'aː', | |
"a": 'aː', | |
# "e(?=v)": 'ɛːʔ', | |
# "e$": 'ɛːʔ', | |
"e": 'ɛː', | |
"i": 'iː', | |
"o": 'oː', | |
"u": 'ʊu', | |
# "u": 'ʊː', | |
"y": 'əː', | |
"ą": 'aɪ', | |
"ę": 'ɛɪ', | |
# "ę(?=\b)(?!')": 'ɛɪʔ', | |
"ǫ": 'ɔɪ', | |
"ḁ": 'aʊ', | |
"ɩa": 'jaː', | |
"ɩe": 'jɛː', | |
"ɩi": 'jiː', | |
"ɩo": 'jɔː', | |
"ɩu": 'juː', | |
"ɩy": 'jəː', | |
"ɩ": 'j', | |
"wa": 'waː', | |
"we": 'wɛː', | |
"wi": 'wiː', | |
"wo": 'wɔː', | |
"wu": 'wuː', | |
"wy": 'wəː', | |
"w": 'w', | |
"c": 'ʃ', | |
"gj": 'gɪʒ', | |
"bj": 'bɪʒ', | |
"j": 'ʒ', | |
"s": 's', | |
"z": 'z', | |
"f": 'f', | |
"v": 'v', | |
"x": 'hh', | |
"'": 'h', | |
# "dj":'dʒ', | |
# "tc":'tʃ', | |
# "dz":'ʣ', | |
# "ts":'ʦ', | |
'r': 'ɹ', | |
'r(?![ˈaeiouyḁąęǫ])': 'ɹɹɹɪ', | |
# 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ', | |
"nˈu": 'nˈʊuː', | |
"nu": 'nʊuː', | |
"ng": 'ng', | |
"n": 'n', | |
"m": 'm', | |
"l": 'l', | |
"b": 'b', | |
"d": 'd', | |
"g": 'ɡ', | |
"k": 'k', | |
"p": 'p', | |
"t": 't', | |
"h": 'h' | |
} | |
vowel_pattern = compile("[aeiouyąęǫḁ]") | |
vowel_coming_pattern = compile("(?=[aeiouyąęǫḁ])") | |
diphthong_coming_pattern = compile("(?=[ąęǫḁ])") | |
question_words = krulermornaize(["ma", "mo", "xu"]) | |
starter_words = krulermornaize(["le", "lo", "lei", "loi"]) | |
terminator_words = krulermornaize(["kei", "ku'o", "vau", "li'u"]) | |
def lojban2ipa(text: str, mode: str) -> str: | |
if mode == 'vits': | |
return lojban2ipa_vits(text) | |
if mode == 'nix': | |
return lojban2ipa_nix(text) | |
return lojban2ipa_vits(text) | |
def lojban2ipa_vits(text: str) -> str: | |
text = krulermorna(text.strip()) | |
words = text.split(' ') | |
rebuilt_words = [] | |
question_sentence = False | |
for index, word in enumerate([*words]): | |
modified_word = word | |
prefix, postfix = "", "" | |
if word in question_words: | |
postfix = "?" | |
prefix=" " + prefix | |
# question_sentence = True | |
if word in starter_words: | |
prefix=" " + prefix | |
# question_sentence = True | |
if word in terminator_words: | |
postfix = ", " | |
# if not vowel_pattern.match(word[-1:][0]): | |
# postfix += "ʔ" | |
# # cmevla | |
# if not vowel_pattern.match(word[0]): | |
# prefix += "ʔ" | |
# if vowel_pattern.match(word[0]): | |
# prefix = "ʔ" + prefix | |
if index == 0 or word in ["ni'o", "i"]: | |
prefix = ", " + prefix | |
split_word = vowel_coming_pattern.split(word) | |
tail_word = split_word[-2:] | |
# add stress to {klama}, {ni'o} | |
if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])): | |
head_word = split_word[:-2] | |
modified_word = "".join(head_word) + "ˈ" + "".join(tail_word) | |
# prefix=" " + prefix | |
# add a pause after two-syllable words | |
postfix = postfix + " " | |
# add stress to {lau}, {coi} | |
elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])): | |
head_word = split_word[:-2] | |
modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1] | |
# prefix=" " + prefix | |
postfix = postfix + " " | |
# add stress to {le} | |
# elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])): | |
# head_word = split_word[:-2] | |
# modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" " | |
# postfix =postfix +" " | |
# add a pause even after a cmavo | |
if not (index - 1 >= 0 and words[index-1] in starter_words): | |
prefix = " " + prefix | |
# # add a pause before {.alis} | |
# if bool(vowel_pattern.match(word[0])): | |
# word = ", " + word | |
""" | |
for each letter: if the slice matches then convert the letter | |
""" | |
rebuilt_word = "" | |
lit = enumerate([*modified_word]) | |
for idx, x in lit: | |
tail = modified_word[idx:] | |
matched = False | |
consumed = 1 | |
for attr, val in sorted(ipa_vits.items(), key=lambda x: len(str(x[0])), reverse=True): | |
pattern = compile("^"+attr) | |
matches = pattern.findall(tail) | |
if len(matches)>0: | |
match = matches[0] | |
consumed = len(match) | |
rebuilt_word += val | |
matched = True | |
break | |
if not matched: | |
rebuilt_word += x | |
[next(lit, None) for _ in range(consumed - 1)] | |
rebuilt_words.append(prefix+rebuilt_word+postfix) | |
output = "".join(rebuilt_words).strip() | |
output = sub(r" {2,}", " ", output) | |
output = sub(r", ?(?=,)", "", output) | |
if question_sentence == True: | |
output += "?" | |
elif bool(vowel_pattern.match(text[-1:][0])): | |
output += "." | |
return output | |
def lojban2ipa_nix(text: str) -> str: | |
text = krulermorna(text.strip()) | |
words = text.split(' ') | |
rebuilt_words = [] | |
question_sentence = False | |
for index, word in enumerate([*words]): | |
modified_word = word | |
prefix, postfix = "", "" | |
if word in question_words: | |
# postfix = "?" | |
prefix=" " + prefix | |
# question_sentence = True | |
if word in starter_words: | |
prefix=" " + prefix | |
# question_sentence = True | |
if word in terminator_words: | |
postfix = ", " | |
# if not vowel_pattern.match(word[-1:][0]): | |
# postfix += "ʔ" | |
# # cmevla | |
# if not vowel_pattern.match(word[0]): | |
# prefix += "ʔ" | |
# if vowel_pattern.match(word[0]): | |
# prefix = "ʔ" + prefix | |
if index == 0 or word in ["ni'o", "i"]: | |
prefix = ", " + prefix | |
split_word = vowel_coming_pattern.split(word) | |
tail_word = split_word[-2:] | |
# add stress to {klama}, {ni'o} | |
if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])): | |
head_word = split_word[:-2] | |
modified_word = "".join(head_word) + "ˈ" + "".join(tail_word) | |
# prefix=" " + prefix | |
# add a pause after two-syllable words | |
postfix = postfix + " " | |
# add stress to {lau}, {coi} | |
elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])): | |
head_word = split_word[:-2] | |
modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1] | |
# prefix=" " + prefix | |
postfix = postfix + " " | |
# add stress to {le} | |
# elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])): | |
# head_word = split_word[:-2] | |
# modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" " | |
# postfix =postfix +" " | |
# add a pause even after a cmavo | |
if not (index - 1 >= 0 and words[index-1] in starter_words): | |
prefix = " " + prefix | |
# # add a pause before {.alis} | |
# if bool(vowel_pattern.match(word[0])): | |
# word = ", " + word | |
""" | |
for each letter: if the slice matches then convert the letter | |
""" | |
rebuilt_word = "" | |
lit = enumerate([*modified_word]) | |
for idx, x in lit: | |
tail = modified_word[idx:] | |
matched = False | |
consumed = 1 | |
for attr, val in sorted(ipa_nix.items(), key=lambda x: len(str(x[0])), reverse=True): | |
pattern = compile("^"+attr) | |
matches = pattern.findall(tail) | |
if len(matches)>0: | |
match = matches[0] | |
consumed = len(match) | |
rebuilt_word += val | |
matched = True | |
break | |
if not matched: | |
rebuilt_word += x | |
[next(lit, None) for _ in range(consumed - 1)] | |
rebuilt_words.append(prefix+rebuilt_word+postfix) | |
output = "".join(rebuilt_words).strip() | |
output = sub(r" {2,}", " ", output) | |
output = sub(r", ?(?=,)", "", output) | |
if question_sentence == True: | |
output += "?" | |
elif bool(vowel_pattern.match(text[-1:][0])): | |
output += "." | |
return output | |
# print(lojban2ipa("ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati")) | |