Spaces:

ixxan
/

uyghur-speech-models

Running

uyghur-speech-models / turkicTTS_ipa_convert.py

Irpan

asr

4f70bd6 15 days ago

55.9 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --


	'''
	2022.06.03
	kazakh_to_ipa() <> ipa_to_kazakh()
	test_kazakh()
	turkish_to_ipa() <> ipa_to_turkish()
	test_turkish()

	2022.07.05
	kyrgyz_to_ipa() <> ipa_to_kyrgyz()
	test_kyrgyz()
	uzbek_to_ipa() <> ipa_to_uzbek()
	test_uzbek()
	azerbaijani_to_ipa() <> ipa_to_azerbaijani()
	test_azerbaijani()
	turkmen_to_ipa() <> ipa_to_turkmen()
	test_turkmen()

	2022.07.07
	tatar_to_ipa() <> ipa_to_tatar()
	test_tatar()
	bashkir_to_ipa() <> ipa_to_bashkir()
	test_bashkir()
	sakha_to_ipa() <> ipa_to_sakha()
	test_sakha()

	2022.07.12
	experimentally added î and â to turkish_to_ipa()

	2022.08.04
	uyghur_to_ipa() <> ipa_to_uyghur()
	'''

	import re


	# kazakh scripts

	def kazakh_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list.
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# three-sound convenience vowels:
	text = re.sub("[Юю]", "ǔ", text)

	# two-sound convenience consonants:
	text = re.sub("[Цц]", "š", text)
	text = re.sub("[Чч]", "ʆ", text)

	# two-sound convenience vowels:
	text = re.sub("[Яя]", "ǎ", text)
	text = re.sub("[Ее]", "ě", text)
	text = re.sub("[Ёё]", "ǒ", text)
	text = re.sub("[Ии]", "ǐ", text)
	text = re.sub("[Уу]", "u", text)

	# single-sound consonants:
	text = re.sub("[Бб]", "b", text)
	text = re.sub("[Вв]", "v", text)
	text = re.sub("[Гг]", "g", text)
	text = re.sub("[Ғғ]", "ɣ", text)
	text = re.sub("[Дд]", "d", text)
	text = re.sub("[Жж]", "ʒ", text)
	text = re.sub("[Зз]", "z", text)
	text = re.sub("[Йй]", "j", text)
	text = re.sub("[Кк]", "k", text)
	text = re.sub("[Ққ]", "q", text)
	text = re.sub("[Лл]", "l", text)
	text = re.sub("[Мм]", "m", text)
	text = re.sub("[Нн]", "n", text)
	text = re.sub("[Ңң]", "ŋ", text)
	text = re.sub("[Пп]", "p", text)
	text = re.sub("[Рр]", "r", text)
	text = re.sub("[Сс]", "s", text)
	text = re.sub("[Тт]", "t", text)
	text = re.sub("[Фф]", "f", text)
	text = re.sub("[Хх]", "x", text)
	text = re.sub("[Һһ]", "h", text)
	text = re.sub("[Шш]", "ʃ", text)
	text = re.sub("[Щщ]", "ɕ", text)
	text = re.sub("[Ъъ]", "ʔ", text)
	text = re.sub("[Ьь]", "ʲ", text)

	# single-sound vowels:
	text = re.sub("[Аа]", "ɑ", text)
	text = re.sub("[Әә]", "æ", text)
	text = re.sub("[Оо]", "ɔ", text)
	text = re.sub("[Өө]", "ɵ", text)
	text = re.sub("[Ұұ]", "ʊ", text)
	text = re.sub("[Үү]", "ʏ", text)
	text = re.sub("[Ыы]", "ɤ", text)
	text = re.sub("[Іі]", "ɪ", text)
	text = re.sub("[Ээ]", "e", text)

	# rules

	'''
	rule 1:
	if [æ], [ě], [ɵ], [ʏ], [ɪ] are followed by [l] and [l] is NOT followed by [æ], [ě], [ɵ], [ʏ], [ɪ], or [ʲ],
	use [ł] instead of [l] (e.g., [kěł], [kěłdɪ], but [kělěmɪn], [marsělʲ]).
	'''
	text = re.sub(r"([æěɵʏɪ])(l)([^æěɵʏɪʲ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letters "о" and "ө", [ɔ] and [ɵ] at the beginning of a word are preceded by [w] (e.g., осы [wɔsɤ] not [ɔsɤ], өзі [wɵzɪ] not [ɵzɪ]).
	'''
	text = re.sub(r"\b([ɔɵ])", r"w\1", text)

	'''
	rule 3
	if the letter "у" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdʒzjkqlłmnŋprstfxhʃɕʔšʆʲ])", r"w\1", text)

	'''
	rule 4:
	if the letter "и" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdʒzjkqlłmnŋprstfxhʃɕʔšʆʲ])", r"i\1", text)

	return text

	def ipa_to_kazakh(text):
	# three-sound convenience vowels:
	text = re.sub("ǔ", "ю", text)

	# two-sound convenience consonants:
	text = re.sub("š", "ц", text)
	text = re.sub("ʆ", "ч", text)

	# two-sound convenience vowels:
	text = re.sub("ǎ", "я", text)
	text = re.sub("ě", "е", text)
	text = re.sub("ǒ", "ё", text)
	text = re.sub("ǐ", "и", text)
	text = re.sub("u", "у", text)

	# single-sound consonants:
	text = re.sub("b", "б", text)
	text = re.sub("v", "в", text)
	text = re.sub("g", "г", text)
	text = re.sub("ɣ", "ғ", text)
	text = re.sub("d", "д", text)
	text = re.sub("ʒ", "ж", text)
	text = re.sub("z", "з", text)
	text = re.sub("j", "й", text)
	text = re.sub("k", "к", text)
	text = re.sub("q", "қ", text)
	text = re.sub("l", "л", text)
	text = re.sub("m", "м", text)
	text = re.sub("n", "н", text)
	text = re.sub("ŋ", "ң", text)
	text = re.sub("p", "п", text)
	text = re.sub("r", "р", text)
	text = re.sub("s", "с", text)
	text = re.sub("t", "т", text)
	text = re.sub("f", "ф", text)
	text = re.sub("x", "х", text)
	text = re.sub("h", "һ", text)
	text = re.sub("ʃ", "ш", text)
	text = re.sub("ɕ", "щ", text)
	text = re.sub("ʔ", "ъ", text)
	text = re.sub("ʲ", "ь", text)

	# single-sound vowels:
	text = re.sub("ɑ", "а", text)
	text = re.sub("æ", "ә", text)
	text = re.sub("ɔ", "о", text)
	text = re.sub("ɵ", "ө", text)
	text = re.sub("ʊ", "ұ", text)
	text = re.sub("ʏ", "ү", text)
	text = re.sub("ɤ", "ы", text)
	text = re.sub("ɪ", "і", text)
	text = re.sub("e", "э", text)

	# anti-rules

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([әеөүі])(ł)([^әеөүіь])", r"\1л\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"\bw([оө])", r"\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"w([бвгғджзйкқлмнңпрстфхһцчшщъьчц])", r"у\1", text)

	'''
	anti-rule 4:
	the symbol [i] is used in one case only, so we can just replace it for и.
	'''
	text = re.sub(r"i", r"и", text)

	'''
	anti-rules for Turkish and Kyrgyz Ǯ, Turkish ł, azerbaijani ḡ, sakha ɲ
	'''
	text = re.sub(r"w([Ǯ])", r"у\1", text)
	text = re.sub(r"Ǯ", r"дж", text)
	text = re.sub(r"ł", r"ль", text)
	text = re.sub(r"ḡ", r"гь", text)
	text = re.sub(r"ɲ", r"нь", text)

	return text

	# testing kazakh scripts

	def test_kazakh(text):
	input_text = text.lower().split()
	output_text = ipa_to_kazakh(kazakh_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# turkish scripts

	def turkish_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list.
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# two-sound convenience consonants:
	text = re.sub("[Cc]", "Ǯ", text)
	text = re.sub("[Çç]", "ʆ", text)

	# two-sound convenience vowels:
	text = re.sub("[İi]", "ǐ", text)
	text = re.sub("[Uu]", "u", text)

	# single-sound consonants:
	text = re.sub("[Jj]", "ʒ", text)
	text = re.sub("[Yy]", "j", text)
	text = re.sub("[Bb]", "b", text)
	text = re.sub("[Dd]", "d", text)
	text = re.sub("[Ff]", "f", text)
	text = re.sub("[Gg]", "g", text)
	text = re.sub("[Ğğ]", "ɣ", text)
	text = re.sub("[Hh]", "h", text)
	text = re.sub("[Kk]", "k", text)
	text = re.sub("[Ll]", "l", text)
	text = re.sub("[Mm]", "m", text)
	text = re.sub("[Nn]", "n", text)
	text = re.sub("[Pp]", "p", text)
	text = re.sub("[Rr]", "r", text)
	text = re.sub("[Ss]", "s", text)
	text = re.sub("[Şş]", "ʃ", text)
	text = re.sub("[Tt]", "t", text)
	text = re.sub("[Vv]", "v", text)
	text = re.sub("[Zz]", "z", text)

	# single-sound vowels:
	text = re.sub("[Aa]", "ɑ", text)
	text = re.sub("[Ee]", "e", text)
	text = re.sub("[Iı]", "ɤ", text)
	text = re.sub("[Oo]", "ɔ", text)
	text = re.sub("[Öö]", "ɵ", text)
	text = re.sub("[Üü]", "ʏ", text)
	text = re.sub("[Îî]", "ǐ", text) # experimentally added
	text = re.sub("[Ââ]", "ɑ", text) # experimentally added


	'''
	rule 1:
	if [e], [ɵ], [ʏ], [i] are followed by [l] and [l] is NOT followed by [e], [ɵ], [ʏ], or [i],
	use [ł] instead of [l] (e.g., [geł], [gełdi], but [gelecek]).
	'''
	text = re.sub(r"([eɵʏǐ])(l)([^eɵʏǐ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letter "u" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdʒzklłmnprstfhʃʆǮ])", r"w\1", text)

	'''
	rule 3:
	if the letter "i" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdʒzklłmnprstfhʃʆǮ])", r"i\1", text)

	return text

	def ipa_to_turkish(text):
	# two-sound convenience consonants:
	text = re.sub("Ǯ", "c", text)
	text = re.sub("ʆ", "ç", text)

	# single-sound consonants:
	text = re.sub("j", "y", text)
	text = re.sub("ʒ", "j", text)
	text = re.sub("b", "b", text)
	text = re.sub("d", "d", text)
	text = re.sub("f", "f", text)
	text = re.sub("g", "g", text)
	text = re.sub("ɣ", "ğ", text)
	text = re.sub("h", "h", text)
	text = re.sub("k", "k", text)
	text = re.sub("l", "l", text)
	text = re.sub("m", "m", text)
	text = re.sub("n", "n", text)
	text = re.sub("p", "p", text)
	text = re.sub("r", "r", text)
	text = re.sub("s", "s", text)
	text = re.sub("ʃ", "ş", text)
	text = re.sub("t", "t", text)
	text = re.sub("v", "v", text)
	text = re.sub("z", "z", text)

	# single-sound vowels:
	text = re.sub("ɑ", "a", text)
	text = re.sub("e", "e", text)
	text = re.sub("ɤ", "ı", text)
	text = re.sub("ǐ", "i", text)
	text = re.sub("ɔ", "o", text)
	text = re.sub("ɵ", "ö", text)
	text = re.sub("ʏ", "ü", text)

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([eöüi])(ł)([^eöüi])", r"\1l\3", text)

	'''
	anti-rule 2:
	the symbol [w] is used in one case only, so we can just replace it for u.
	'''
	text = re.sub(r"w", r"u", text)

	'''
	anti-rule 3:
	the symbol [i] is used in one case only, so we can just replace it for i.
	'''
	text = re.sub(r"i", r"i", text)

	return text

	# testing turkish scripts

	def test_turkish(text):
	input_text = text.lower().split()
	output_text = ipa_to_turkish(turkish_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# kyrgyz scripts

	def kyrgyz_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# three-sound convenience vowels:
	text = re.sub("[Юю]", "ǔ", text)

	# two-sound convenience consonants:
	text = re.sub("[Цц]", "š", text)
	text = re.sub("[Чч]", "ʆ", text)
	text = re.sub("[Жж]", "Ǯ", text)

	# two-sound convenience vowels:
	text = re.sub("[Яя]", "ǎ", text)
	text = re.sub("[Ее]", "ě", text)
	text = re.sub("[Ёё]", "ǒ", text)
	text = re.sub("[Ии]", "ǐ", text)
	text = re.sub("[Уу]", "u", text)

	# single-sound consonants:
	text = re.sub("[Бб]", "b", text)
	text = re.sub("[Вв]", "v", text)
	text = re.sub("[Гг]", "g", text)
	text = re.sub("[Дд]", "d", text)
	text = re.sub("[Зз]", "z", text)
	text = re.sub("[Йй]", "j", text)
	text = re.sub("[Кк]", "k", text)
	text = re.sub("[Лл]", "l", text)
	text = re.sub("[Мм]", "m", text)
	text = re.sub("[Нн]", "n", text)
	text = re.sub("[Ңң]", "ŋ", text)
	text = re.sub("[Пп]", "p", text)
	text = re.sub("[Рр]", "r", text)
	text = re.sub("[Сс]", "s", text)
	text = re.sub("[Тт]", "t", text)
	text = re.sub("[Фф]", "f", text)
	text = re.sub("[Хх]", "x", text)
	text = re.sub("[Шш]", "ʃ", text)
	text = re.sub("[Щщ]", "ɕ", text)
	text = re.sub("[Ъъ]", "ʔ", text)
	text = re.sub("[Ьь]", "ʲ", text)

	# single-sound vowels:
	text = re.sub("[Аа]", "ɑ", text)
	text = re.sub("[Оо]", "ɔ", text)
	text = re.sub("[Өө]", "ɵ", text)
	text = re.sub("[Үү]", "ʏ", text)
	text = re.sub("[Ыы]", "ɤ", text)
	text = re.sub("[Ээ]", "e", text)

	# rules 1-4 are similar to those for Kazakh:

	'''
	rule 1:
	if [ě], [ɵ], [ʏ], are followed by [l] and [l] is NOT followed by [ě], [ɵ], [ʏ], or [ʲ],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([ɵʏě])(l)([^ɵʏěʲ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letters "о" and "ө", [ɔ] and [ɵ] at the beginning of a word are followed by [w] (e.g., осы [wɔsɤ] not [ɔsɤ], өзі [wɵzɪ] not [ɵzɪ]).
	'''
	text = re.sub(r"\b([ɔɵ])", r"w\1", text)

	'''
	rule 3
	if the letter "у" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdzjkqlłmnŋprstfxhʃɕʔšʆǮʲ])", r"w\1", text)

	'''
	rule 4:
	if the letter "и" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdzjkqlłmnŋprstfxhʃɕʔšʆǮʲ])", r"i\1", text)

	# rules 5-6 are specific to Kyrgyz:

	'''
	rule 5
	ɑ\|ɔ\|u\|ɤ + k + ɑ\|ɔ\|u\|ɤ
	'''
	text = re.sub(r"([ɑɔwɤ])k", r"\1q", text)
	text = re.sub(r"k([ɑɔuɤ])", r"q\1", text)

	'''
	rule 6
	ɑ\|ɔ\|u\|ɤ + g + ɑ\|ɔ\|u\|ɤ
	'''
	text = re.sub(r"([ɑɔwɤ])g", r"\1ɣ", text)
	text = re.sub(r"g([ɑɔuɤ])", r"ɣ\1", text)

	return text

	def ipa_to_kyrgyz(text):
	# three-sound convenience vowels:
	text = re.sub("ǔ", "ю", text)

	# two-sound convenience consonants:
	text = re.sub("š", "ц", text)
	text = re.sub("ʆ", "ч", text)
	text = re.sub("Ǯ", "ж", text)

	# two-sound convenience vowels:
	text = re.sub("ǎ", "я", text)
	text = re.sub("ě", "е", text)
	text = re.sub("ǒ", "ё", text)
	text = re.sub("ǐ", "и", text)
	text = re.sub("u", "у", text)

	# single-sound consonants:
	text = re.sub("b", "б", text)
	text = re.sub("v", "в", text)
	text = re.sub("g", "г", text)
	text = re.sub("ɣ", "г", text)
	text = re.sub("d", "д", text)
	text = re.sub("z", "з", text)
	text = re.sub("j", "й", text)
	text = re.sub("k", "к", text)
	text = re.sub("l", "л", text)
	text = re.sub("m", "м", text)
	text = re.sub("n", "н", text)
	text = re.sub("ŋ", "ң", text)
	text = re.sub("p", "п", text)
	text = re.sub("q", "к", text)
	text = re.sub("r", "р", text)
	text = re.sub("s", "с", text)
	text = re.sub("t", "т", text)
	text = re.sub("f", "ф", text)
	text = re.sub("x", "х", text)
	text = re.sub("ʃ", "ш", text)
	text = re.sub("ɕ", "щ", text)
	text = re.sub("ʔ", "ъ", text)
	text = re.sub("ʲ", "ь", text)

	# single-sound vowels:
	text = re.sub("ɑ", "а", text)
	text = re.sub("ɔ", "о", text)
	text = re.sub("ɵ", "ө", text)
	text = re.sub("ʏ", "ү", text)
	text = re.sub("ɤ", "ы", text)
	text = re.sub("e", "э", text)

	# anti-rules 1-4 are similar to those for Kazakh:

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([өүе])(ł)([^өүеʲ])", r"\1л\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"\bw([оө])", r"\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"w([бвгдзйклмнңпрстфхцчшщъьчцж])", r"у\1", text)

	'''
	anti-rule 4:
	'''
	text = re.sub(r"i([бвгдзйклмнңпрстфхцчшщъьчцж])", r"и\1", text)

	return text

	# testing kyrgyz scripts

	def test_kyrgyz(text):
	input_text = text.lower().split()
	output_text = ipa_to_kyrgyz(kyrgyz_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# uzbek scripts

	def uzbek_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# two-sound convenience consonants:
	text = re.sub("[Jj]", "Ǯ", text)
	text = re.sub("Ch", "ʆ", text)
	text = re.sub("ch", "ʆ", text)

	# two-sound convenience vowels:
	text = re.sub("[Ii]", "ǐ", text)
	text = re.sub("[Uu]", "u", text)

	# single-sound consonants:
	text = re.sub("[Bb]", "b", text)
	text = re.sub("[Dd]", "d", text)
	text = re.sub("[Ff]", "f", text)
	text = re.sub("G‘", "ɣ", text)
	text = re.sub("g‘", "ɣ", text)
	text = re.sub("[Gg]", "g", text)
	text = re.sub("[Hh]", "h", text)
	text = re.sub("[Kk]", "k", text)
	text = re.sub("[Ll]", "l", text)
	text = re.sub("[Mm]", "m", text)
	text = re.sub("[Nn]", "n", text)
	text = re.sub("Ng", "ŋ", text)
	text = re.sub("ng", "ŋ", text)
	text = re.sub("[Pp]", "p", text)
	text = re.sub("[Qq]", "q", text)
	text = re.sub("[Rr]", "r", text)
	text = re.sub("[Ss]", "s", text)
	text = re.sub("Sh", "ʃ", text)
	text = re.sub("sh", "ʃ", text)
	text = re.sub("[Tt]", "t", text)
	text = re.sub("[Vv]", "v", text)
	text = re.sub("[Xx]", "x", text)
	text = re.sub("[Yy]", "j", text)
	text = re.sub("[Zz]", "z", text)

	# single-sound vowels:
	text = re.sub("[Aa]", "æ", text)
	text = re.sub("[Ee]", "e", text)
	text = re.sub("Oʻ", "ɵ", text)
	text = re.sub("oʻ", "ɵ", text)
	text = re.sub("[Oo]", "ɔ", text)

	# hard sign
	text = re.sub("'", "ʔ", text)

	'''
	rule 1:
	if [æ], [e], [ɵ], [ǐ] are followed by [l] and [l] is NOT followed by [æ], [e], [ɵ], [ǐ],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æɵǐe])(l)([^æɵǐe])", r"\1ł\3", text)

	'''
	rule 2:
	if the letter "u" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdzjkqlłmnŋprstfxhʃʔʆǮ])", r"w\1", text)

	'''
	rule 3:
	if the letter "i" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdzjkqlłmnŋprstfxhʃʔʆǮ])", r"i\1", text)

	return text

	def ipa_to_uzbek(text):
	# two-sound convenience consonants:
	text = re.sub("j", "y", text) # exception! precedence issue
	text = re.sub("Ǯ", "j", text)
	text = re.sub("ʆ", "ch", text)

	# two-sound convenience vowels:
	text = re.sub("ǐ", "i", text)
	text = re.sub("u", "u", text)

	# single-sound convenience consonants:
	text = re.sub("b", "b", text)
	text = re.sub("d", "d", text)
	text = re.sub("f", "f", text)
	text = re.sub("g", "g", text)
	text = re.sub("ɣ", "g‘", text)
	text = re.sub("h", "h", text)
	text = re.sub("k", "k", text)
	text = re.sub("l", "l", text)
	text = re.sub("m", "m", text)
	text = re.sub("n", "n", text)
	text = re.sub("ŋ", "ng", text)
	text = re.sub("p", "p", text)
	text = re.sub("q", "q", text)
	text = re.sub("r", "r", text)
	text = re.sub("s", "s", text)
	text = re.sub("ʃ", "sh", text)
	text = re.sub("t", "t", text)
	text = re.sub("v", "v", text)
	text = re.sub("x", "x", text)
	text = re.sub("z", "z", text)

	# single-sound convenience vowels:
	text = re.sub("æ", "a", text)
	text = re.sub("e", "e", text)
	text = re.sub("ɵ", "o‘", text)
	text = re.sub("ɔ", "o", text)

	# hard sign
	text = re.sub("ʔ", "'", text)

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([aei‘])(ł)([^aei‘])", r"\1l\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"w([bcvgɣdjzklmnpqrstfhyx])", r"u\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"i([bcvgɣdjzklmnpqrstfhyx])", r"i\1", text)

	return text

	# testing uzbek scripts

	def test_uzbek(text):
	input_text = text.lower().split()
	output_text = ipa_to_uzbek(uzbek_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# azerbaijani scripts

	def azerbaijani_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# two-sound convenience consonants:
	text = re.sub("[Cc]", "Ǯ", text)
	text = re.sub("[Çç]", "ʆ", text)
	text = re.sub("[Gg]", "ḡ", text)

	# two-sound convenience vowels:
	text = re.sub("[İi]", "ǐ", text)
	text = re.sub("[Uu]", "u", text)

	# single-sound consonants:
	text = re.sub("[Jj]", "ʒ", text)
	text = re.sub("[Yy]", "j", text)
	text = re.sub("[Bb]", "b", text)
	text = re.sub("[Dd]", "d", text)
	text = re.sub("[Ff]", "f", text)
	text = re.sub("[Ğğ]", "ɣ", text)
	text = re.sub("[Hh]", "h", text)
	text = re.sub("[Xx]", "x", text)
	text = re.sub("[Kk]", "k", text)
	text = re.sub("[Qq]", "g", text)
	text = re.sub("[Ll]", "l", text)
	text = re.sub("[Mm]", "m", text)
	text = re.sub("[Nn]", "n", text)
	text = re.sub("[Pp]", "p", text)
	text = re.sub("[Rr]", "r", text)
	text = re.sub("[Ss]", "s", text)
	text = re.sub("[Şş]", "ʃ", text)
	text = re.sub("[Tt]", "t", text)
	text = re.sub("[Vv]", "v", text)
	text = re.sub("[Zz]", "z", text)

	# single-sound vowels:
	text = re.sub("[Aa]", "ɑ", text)
	text = re.sub("[Ee]", "e", text)
	text = re.sub("[Əə]", "æ", text)
	text = re.sub("[Iı]", "ɤ", text)
	text = re.sub("[Oo]", "ɔ", text)
	text = re.sub("[Öö]", "ɵ", text)
	text = re.sub("[Üü]", "ʏ", text)

	'''
	rule 1:
	if [æ], [e], [ɵ], [ʏ], [i] are followed by [l] and [l] is NOT followed by [æ], [e], [ɵ], [ʏ], or [i],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æeɵʏǐ])(l)([^æeɵʏǐ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letter "u" [ʊw] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgḡɣdʒzklłmnprstfhxʃʆǮ])", r"w\1", text)

	'''
	rule 3:
	if the letter "i" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgḡɣdʒzklłmnprstfhxʃʆǮ])", r"i\1", text)

	return text

	def ipa_to_azerbaijani(text):
	# two-sound convenience consonants:
	text = re.sub("Ǯ", "c", text)
	text = re.sub("ʆ", "ç", text)
	text = re.sub("g", "q", text) # precedence issue
	text = re.sub("ḡ", "g", text)

	# single-sound consonants:
	text = re.sub("j", "y", text)
	text = re.sub("ʒ", "j", text)
	text = re.sub("b", "b", text)
	text = re.sub("d", "d", text)
	text = re.sub("f", "f", text)
	text = re.sub("ɣ", "ğ", text)
	text = re.sub("h", "h", text)
	text = re.sub("x", "x", text)
	text = re.sub("k", "k", text)
	text = re.sub("l", "l", text)
	text = re.sub("m", "m", text)
	text = re.sub("n", "n", text)
	text = re.sub("p", "p", text)
	text = re.sub("r", "r", text)
	text = re.sub("s", "s", text)
	text = re.sub("ʃ", "ş", text)
	text = re.sub("t", "t", text)
	text = re.sub("v", "v", text)
	text = re.sub("z", "z", text)

	# single-sound vowels:
	text = re.sub("ɑ", "a", text)
	text = re.sub("e", "e", text)
	text = re.sub("æ", "ə", text)
	text = re.sub("ɤ", "ı", text)
	text = re.sub("ǐ", "i", text)
	text = re.sub("ɔ", "o", text)
	text = re.sub("ɵ", "ö", text)
	text = re.sub("ʏ", "ü", text)

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([əeöüiě])(ł)([^əeöüiě])", r"\1l\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"w([bvgğdjzkqlmnprstfhxşçc])", r"u\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"i([bcvgğdjzkqlmnprstfhxşç])", r"i\1", text)

	return text

	# testing azerbaijani scripts

	def test_azerbaijani(text):
	input_text = text.lower().split()
	output_text = ipa_to_azerbaijani(azerbaijani_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# turkmen scripts

	def turkmen_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# two-sound convenience consonants:
	text = re.sub("[Çç]", "ʆ", text)
	text = re.sub("[Jj]", "Ǯ", text)

	# two-sound convenience vowels:
	text = re.sub("[İi]", "ǐ", text)
	text = re.sub("[Uu]", "u", text)

	# single-sound consonants:
	text = re.sub("[Bb]", "b", text)
	text = re.sub("[Dd]", "d", text)
	text = re.sub("[Ff]", "f", text)
	text = re.sub("[Gg]", "g", text)
	text = re.sub("[Hh]", "h", text)
	text = re.sub("[Žž]", "ʒ", text)
	text = re.sub("[Kk]", "k", text)
	text = re.sub("[Ll]", "l", text)
	text = re.sub("[Mm]", "m", text)
	text = re.sub("[Nn]", "n", text)
	text = re.sub("[Ňň]", "ŋ", text)
	text = re.sub("[Pp]", "p", text)
	text = re.sub("[Rr]", "r", text)
	text = re.sub("[Ss]", "s", text) # θ
	text = re.sub("[Şş]", "ʃ", text)
	text = re.sub("[Tt]", "t", text)
	text = re.sub("[Ww]", "v", text)
	text = re.sub("[Ýý]", "j", text)
	text = re.sub("[Zz]", "z", text) # ð

	# single-sound vowels:
	text = re.sub("[Aa]", "ɑ", text)
	text = re.sub("[Ää]", "æ", text)
	text = re.sub("[Ee]", "e", text)
	text = re.sub("[Oo]", "ɔ", text)
	text = re.sub("[Öö]", "ɵ", text)
	text = re.sub("[Üü]", "ʏ", text)
	text = re.sub("[Yy]", "ɤ", text)

	# rules:

	'''
	rule 1:
	if [æ], [e], [ɵ], [ʏ], [i] are followed by [l] and [l] is NOT followed by [æ], [e], [ɵ], [ʏ], or [i],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æeɵʏǐ])(l)([^æeɵʏǐ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letter "u" [ʊw] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣqdʒzkqlłmnprstfhʃʆǮw])", r"w\1", text)

	'''
	rule 3:
	if the letter "i" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣqdʒzkqlłmnprstfhʃʆǮ])", r"i\1", text)

	# rules 4-5 are specific to Turkmen:

	'''
	rule 4:
	a, o, u, y + k + a, o, u, y:
	'''
	text = re.sub(r"k([ɑɔuɤ])", r"q\1", text)
	text = re.sub(r"([ɑɔwɤ])k", r"\1q", text)

	'''
	rule 5:
	a, o, u, y + g + a, o, u, y:
	'''
	text = re.sub(r"g([ɑɔuɤ])", r"ɣ\1", text)
	text = re.sub(r"([ɑɔwɤ])g", r"\1ɣ", text)

	return text

	def ipa_to_turkmen(text):
	# two-sound convenience consonants:
	text = re.sub("j", "ý", text) # precedence issue
	text = re.sub("Ǯ", "j", text)
	text = re.sub("ʆ", "ç", text)

	# single-sound consonants: # w --> v can be found where the letter u anti-rule is
	text = re.sub("b", "b", text)
	text = re.sub("d", "d", text)
	text = re.sub("f", "f", text)
	text = re.sub("g", "g", text)
	text = re.sub("ɣ", "g", text)
	text = re.sub("h", "h", text)
	text = re.sub("ʒ", "ž", text)
	text = re.sub("k", "k", text)
	text = re.sub("q", "k", text)
	text = re.sub("l", "l", text)
	text = re.sub("m", "m", text)
	text = re.sub("n", "n", text)
	text = re.sub("ŋ", "ň", text)
	text = re.sub("p", "p", text)
	text = re.sub("r", "r", text)
	text = re.sub("s", "s", text)
	text = re.sub("ʃ", "ş", text)
	text = re.sub("t", "t", text)
	text = re.sub("z", "z", text)

	# single-sound vowels:
	text = re.sub("ɑ", "a", text)
	text = re.sub("e", "e", text)
	text = re.sub("æ", "ä", text)
	text = re.sub("ǐ", "i", text)
	text = re.sub("ɔ", "o", text)
	text = re.sub("ɵ", "ö", text)
	text = re.sub("ʏ", "ü", text)
	text = re.sub("ɤ", "y", text)

	# anti-rules:

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([äeöüiě])(ł)([^äeöüiě])", r"\1l\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"w([bdfghžklmnňprsştýzjçɣqv])", r"u\1", text) # precedence issue
	text = re.sub("v", "w", text) # precedence issue

	'''
	anti-rule 3:
	'''
	text = re.sub(r"i([bdfghžklmnňprsştwýzjçɣq])", r"i\1", text)

	return text

	# testing turkmen scripts

	def test_turkmen(text):
	input_text = text.lower().split()
	output_text = ipa_to_turkmen(turkmen_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# tatar scripts

	def tatar_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# three-sound convenience vowels:
	text = re.sub("[Юю]", "ǔ", text)

	# two-sound convenience consonants:
	text = re.sub("[Цц]", "š", text)
	text = re.sub("[Чч]", "ʆ", text)
	text = re.sub("[Җҗ]", "Ǯ", text)

	# two-sound convenience vowels:
	text = re.sub("[Яя]", "ǎ", text)
	text = re.sub("[Ее]", "ě", text)
	text = re.sub("[Ёё]", "ǒ", text)
	text = re.sub("[Ии]", "ǐ", text)
	text = re.sub("[Уу]", "u", text)

	# single-sound consonants:
	text = re.sub("[Бб]", "b", text)
	text = re.sub("[Вв]", "v", text)
	text = re.sub("[Гг]", "g", text)
	text = re.sub("[Дд]", "d", text)
	text = re.sub("[Жж]", "ʒ", text)
	text = re.sub("[Зз]", "z", text)
	text = re.sub("[Йй]", "j", text)
	text = re.sub("[Кк]", "k", text)
	text = re.sub("[Лл]", "l", text)
	text = re.sub("[Мм]", "m", text)
	text = re.sub("[Нн]", "n", text)
	text = re.sub("[Ңң]", "ŋ", text)
	text = re.sub("[Пп]", "p", text)
	text = re.sub("[Рр]", "r", text)
	text = re.sub("[Сс]", "s", text)
	text = re.sub("[Тт]", "t", text)
	text = re.sub("[Фф]", "f", text)
	text = re.sub("[Хх]", "x", text)
	text = re.sub("[Һһ]", "h", text)
	text = re.sub("[Шш]", "ʃ", text)
	text = re.sub("[Щщ]", "ɕ", text)
	text = re.sub("[Ъъ]", "ʔ", text)
	text = re.sub("[Ьь]", "ʲ", text)

	# single-sound vowels:
	text = re.sub("[Аа]", "ɑ", text)
	text = re.sub("[Әә]", "æ", text)
	text = re.sub("[Оо]", "ɔ", text)
	text = re.sub("[Өө]", "ɵ", text)
	text = re.sub("[Үү]", "ʏ", text)
	text = re.sub("[Ыы]", "ɤ", text)
	text = re.sub("[Ээ]", "e", text)

	# rules 1-4 are similar to those for Kazakh:

	'''
	rule 1:
	if [ě], [ɵ], [ʏ], are followed by [l] and [l] is NOT followed by [ě], [ɵ], [ʏ], or [ʲ],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æɵʏě])(l)([^æɵʏěʲ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letters "о" and "ө", [ɔ] and [ɵ] at the beginning of a word are followed by [w] (e.g., осы [wɔsɤ] not [ɔsɤ], өзі [wɵzɪ] not [ɵzɪ]).
	'''
	text = re.sub(r"\b([ɔɵ])", r"w\1", text)

	'''
	rule 3
	if the letter "у" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdʒzjkqlłmnŋprstfxhʃɕʔšʆǮʲ])", r"w\1", text)

	'''
	rule 4:
	if the letter "и" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdʒzjkqlłmnŋprstfxhʃɕʔšʆǮʲ])", r"i\1", text)

	# rules 5-6 are specific to Tatar:

	'''
	rule 5:
	а, о, у, ы, ъ + к + а, о, у, ы, ъ
	'''
	text = re.sub(r"k([ɑɔwɤʔ])", r"q\1", text)
	text = re.sub(r"([ɑɔwɤʔ])k", r"\1q", text)

	'''
	rule 6:
	а, о, у, ы, ъ + г + а, о, у, ы, ъ
	'''
	text = re.sub(r"g([ɑɔwɤʔ])", r"ɣ\1", text)
	text = re.sub(r"([ɑɔwɤʔ])g", r"\1ɣ", text)

	return text

	def ipa_to_tatar(text):
	# three-sound convenience vowels:
	text = re.sub("ǔ", "ю", text)

	# two-sound convenience consonants:
	text = re.sub("š", "ц", text)
	text = re.sub("ʆ", "ч", text)
	text = re.sub("Ǯ", "җ", text)

	# two-sound convenience vowels:
	text = re.sub("ǎ", "я", text)
	text = re.sub("ě", "е", text)
	text = re.sub("ǒ", "ё", text)
	text = re.sub("ǐ", "и", text)
	text = re.sub("u", "у", text)

	# single-sound consonants:
	text = re.sub("b", "б", text)
	text = re.sub("v", "в", text)
	text = re.sub("g", "г", text)
	text = re.sub("ɣ", "г", text)
	text = re.sub("d", "д", text)
	text = re.sub("ʒ", "ж", text)
	text = re.sub("z", "з", text)
	text = re.sub("j", "й", text)
	text = re.sub("k", "к", text)
	text = re.sub("l", "л", text)
	text = re.sub("m", "м", text)
	text = re.sub("n", "н", text)
	text = re.sub("ŋ", "ң", text)
	text = re.sub("p", "п", text)
	text = re.sub("q", "к", text)
	text = re.sub("r", "р", text)
	text = re.sub("s", "с", text)
	text = re.sub("t", "т", text)
	text = re.sub("f", "ф", text)
	text = re.sub("x", "х", text)
	text = re.sub("h", "һ", text)
	text = re.sub("ʃ", "ш", text)
	text = re.sub("ɕ", "щ", text)
	text = re.sub("ʔ", "ъ", text)
	text = re.sub("ʲ", "ь", text)

	# single-sound vowels:
	text = re.sub("ɑ", "а", text)
	text = re.sub("æ", "ә", text)
	text = re.sub("ɔ", "о", text)
	text = re.sub("ɵ", "ө", text)
	text = re.sub("ʏ", "ү", text)
	text = re.sub("ɤ", "ы", text)
	text = re.sub("e", "э", text)

	# anti-rules 1-4 are similar to those for Kazakh:

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([әөүе])(ł)([^әөүеʲ])", r"\1л\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"\bw([оө])", r"\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"w([бвгдзйклмнңпрстфхһцчшщъьчцжҗqɣ])", r"у\1", text)

	'''
	anti-rule 4:
	'''
	text = re.sub(r"i([бвгдзйклмнңпрстфхһцчшщъьчцжҗqɣ])", r"и\1", text)

	return text

	# testing tatar scripts

	def test_tatar(text):
	input_text = text.lower().split()
	output_text = ipa_to_tatar(tatar_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# bashkir scripts

	def bashkir_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we shall later convert them to conventional symbols.

	# three-sound convenience vowels:
	text = re.sub("[Юю]", "ǔ", text)

	# two-sound convenience consonants:
	text = re.sub("[Цц]", "š", text)
	text = re.sub("[Чч]", "ʆ", text)

	# two-sound convenience vowels:
	text = re.sub("[Яя]", "ǎ", text)
	text = re.sub("[Ее]", "ě", text)
	text = re.sub("[Ёё]", "ǒ", text)
	text = re.sub("[Ии]", "ǐ", text)
	text = re.sub("[Уу]", "u", text)

	# single-sound consonants:
	text = re.sub("[Бб]", "b", text)
	text = re.sub("[Вв]", "v", text)
	text = re.sub("[Гг]", "g", text)
	text = re.sub("[Ғғ]", "ɣ", text)
	text = re.sub("[Дд]", "d", text)
	text = re.sub("[Ҙҙ]", "z", text)
	text = re.sub("[Жж]", "ʒ", text)
	text = re.sub("[Зз]", "z", text)
	text = re.sub("[Йй]", "j", text)
	text = re.sub("[Кк]", "k", text)
	text = re.sub("[Ҡҡ]", "q", text)
	text = re.sub("[Лл]", "l", text)
	text = re.sub("[Мм]", "m", text)
	text = re.sub("[Нн]", "n", text)
	text = re.sub("[Ңң]", "ŋ", text)
	text = re.sub("[Пп]", "p", text)
	text = re.sub("[Рр]", "r", text)
	text = re.sub("[Сс]", "s", text)
	text = re.sub("[Ҫҫ]", "s", text)
	text = re.sub("[Тт]", "t", text)
	text = re.sub("[Хх]", "x", text)
	text = re.sub("[Фф]", "f", text)
	text = re.sub("[Һһ]", "h", text)
	text = re.sub("[Шш]", "ʃ", text)
	text = re.sub("[Щщ]", "ɕ", text)
	text = re.sub("[Ъъ]", "ʔ", text)
	text = re.sub("[Ьь]", "ʲ", text)

	# single-sound vowels:
	text = re.sub("[Аа]", "ɑ", text)
	text = re.sub("[Әә]", "æ", text)
	text = re.sub("[Оо]", "ɔ", text)
	text = re.sub("[Өө]", "ɵ", text)
	text = re.sub("[Үү]", "ʏ", text)
	text = re.sub("[Ыы]", "ɤ", text)
	text = re.sub("[Ээ]", "e", text)

	# rules 1-4 are similar to those for Kazakh:

	'''
	rule 1:
	if [ě], [ɵ], [ʏ], are followed by [l] and [l] is NOT followed by [ě], [ɵ], [ʏ], or [ʲ],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æɵʏě])(l)([^æɵʏěʲ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letters "о" and "ө", [ɔ] and [ɵ] at the beginning of a word are followed by [w] (e.g., осы [wɔsɤ] not [ɔsɤ], өзі [wɵzɪ] not [ɵzɪ]).
	'''
	text = re.sub(r"\b([ɔɵ])", r"w\1", text)

	'''
	rule 3
	if the letter "у" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdʒzjkqlłmnŋprstfxhʃɕʔšʆʲ])", r"w\1", text)

	'''
	rule 4:
	if the letter "и" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdʒzjkqlłmnŋprstfxhʃɕʔšʆʲ])", r"i\1", text)

	return text

	def ipa_to_bashkir(text):
	# three-sound convenience vowels:
	text = re.sub("ǔ", "ю", text)

	# two-sound convenience consonants:
	text = re.sub("š", "ц", text)
	text = re.sub("ʆ", "ч", text)

	# two-sound convenience vowels:
	text = re.sub("ě", "е", text)
	text = re.sub("ǒ", "ё", text)
	text = re.sub("ǐ", "и", text)
	text = re.sub("u", "у", text)
	text = re.sub("ǎ", "я", text)

	# single-sound consonants:
	text = re.sub("b", "б", text)
	text = re.sub("v", "в", text)
	text = re.sub("g", "г", text)
	text = re.sub("ɣ", "ғ", text)
	text = re.sub("d", "д", text)
	text = re.sub("z", "з", text)
	text = re.sub("ʒ", "ж", text)
	text = re.sub("j", "й", text)
	text = re.sub("k", "к", text)
	text = re.sub("q", "ҡ", text)
	text = re.sub("l", "л", text)
	text = re.sub("m", "м", text)
	text = re.sub("n", "н", text)
	text = re.sub("ŋ", "ң", text)
	text = re.sub("p", "п", text)
	text = re.sub("r", "р", text)
	text = re.sub("s", "с", text)
	text = re.sub("t", "т", text)
	text = re.sub("f", "ф", text)
	text = re.sub("x", "х", text)
	text = re.sub("h", "һ", text)
	text = re.sub("ʃ", "ш", text)
	text = re.sub("ɕ", "щ", text)
	text = re.sub("ʔ", "ъ", text)
	text = re.sub("ʲ", "ь", text)

	# single-sound vowels:
	text = re.sub("ɑ", "а", text)
	text = re.sub("æ", "ә", text)
	text = re.sub("ɔ", "о", text)
	text = re.sub("ɵ", "ө", text)
	text = re.sub("ʏ", "ү", text)
	text = re.sub("ɤ", "ы", text)
	text = re.sub("e", "э", text)

	# anti-rules 1-4 are similar to those for Kazakh:

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([әөүе])(ł)([^әөүеʲ])", r"\1л\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"\bw([оө])", r"\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"w([бвгғдзйкҡлмнңпрстфхһцчшщъьчцж])", r"у\1", text)

	'''
	anti-rule 4:


	'''
	text = re.sub(r"i([бвгғдзйкҡлмнңпрстфхһцчшщъьчцж])", r"и\1", text)

	return text

	# testing bashkir scripts

	def test_bashkir(text):
	input_text = text.lower().split()
	output_text = ipa_to_bashkir(bashkir_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# sakha scripts

	def sakha_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# three-sound convenience vowels:
	text = re.sub("[Юю]", "ǔ", text)

	# two-sound convenience consonants:
	text = re.sub("[Цц]", "š", text)
	text = re.sub("[Чч]", "ʆ", text)
	text = re.sub("ДЬ", "Ǯ", text)
	text = re.sub("дь", "Ǯ", text)
	text = re.sub("Дь", "Ǯ", text)
	text = re.sub("дЬ", "Ǯ", text)
	text = re.sub("НЬ", "ɲ", text)
	text = re.sub("нь", "ɲ", text)
	text = re.sub("Нь", "ɲ", text)
	text = re.sub("нЬ", "ɲ", text)

	# two-sound convenience vowels:
	text = re.sub("[Яя]", "ǎ", text)
	text = re.sub("[Ее]", "ě", text)
	text = re.sub("[Ёё]", "ǒ", text)
	text = re.sub("[Ии]", "ǐ", text)
	text = re.sub("[Уу]", "u", text)

	# single-sound consonants:
	text = re.sub("[Бб]", "b", text)
	text = re.sub("[Вв]", "v", text)
	text = re.sub("[Гг]", "g", text)
	text = re.sub("[Ҕҕ]", "ɣ", text)
	text = re.sub("[Дд]", "d", text)
	text = re.sub("[Жж]", "ʒ", text)
	text = re.sub("[Зз]", "z", text)
	text = re.sub("[Йй]", "j", text)
	text = re.sub("[Кк]", "k", text)
	text = re.sub("[Лл]", "l", text)
	text = re.sub("[Мм]", "m", text)
	text = re.sub("[Нн]", "n", text)
	text = re.sub("[Ҥҥ]", "ŋ", text)
	text = re.sub("[Пп]", "p", text)
	text = re.sub("[Рр]", "r", text)
	text = re.sub("[Сс]", "s", text)
	text = re.sub("[Тт]", "t", text)
	text = re.sub("[Хх]", "x", text)
	text = re.sub("[Фф]", "f", text)
	text = re.sub("[Һһ]", "h", text)
	text = re.sub("[Шш]", "ʃ", text)
	text = re.sub("[Щщ]", "ɕ", text)
	text = re.sub("[Ъъ]", "ʔ", text)
	text = re.sub("[Ьь]", "ʲ", text)

	# single-sound vowels:
	text = re.sub("[Аа]", "ɑ", text)
	text = re.sub("[Әә]", "æ", text)
	text = re.sub("[Оо]", "ɔ", text)
	text = re.sub("[Өө]", "ɵ", text)
	text = re.sub("[Үү]", "ʏ", text)
	text = re.sub("[Ыы]", "ɤ", text)
	text = re.sub("[Ээ]", "e", text)

	# rules 1-4 are similar to those for Kazakh:

	'''
	rule 1:
	if [ě], [ɵ], [ʏ], are followed by [l] and [l] is NOT followed by [ě], [ɵ], [ʏ], or [ʲ],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æɵʏě])(l)([^æɵʏěʲ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letters "о" and "ө", [ɔ] and [ɵ] at the beginning of a word are followed by [w] (e.g., осы [wɔsɤ] not [ɔsɤ], өзі [wɵzɪ] not [ɵzɪ]).
	'''
	text = re.sub(r"\b([ɔɵ])", r"w\1", text)

	'''
	rule 3
	if the letter "у" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdʒzjklłmnŋɲprstfxhʃɕʔšʆǮʲ])", r"w\1", text)

	'''
	rule 4:
	if the letter "и" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdʒzjklłmnŋɲprstfxhʃɕʔšʆǮʲ])", r"i\1", text)

	return text

	def ipa_to_sakha(text):
	# three-sound convenience vowels:
	text = re.sub("ǔ", "ю", text)

	# two-sound convenience consonants:
	text = re.sub("š", "ц", text)
	text = re.sub("ʆ", "ч", text)
	text = re.sub("Ǯ", "дь", text)
	text = re.sub("ɲ", "нь", text)

	# two-sound convenience vowels:
	text = re.sub("ě", "е", text)
	text = re.sub("ǒ", "ё", text)
	text = re.sub("ǐ", "и", text)
	text = re.sub("u", "у", text)
	text = re.sub("ǎ", "я", text)

	# single-sound consonants:
	text = re.sub("b", "б", text)
	text = re.sub("v", "в", text)
	text = re.sub("g", "г", text)
	text = re.sub("ɣ", "ҕ", text)
	text = re.sub("d", "д", text)
	text = re.sub("z", "з", text)
	text = re.sub("ʒ", "ж", text)
	text = re.sub("j", "й", text)
	text = re.sub("k", "к", text)
	text = re.sub("l", "л", text)
	text = re.sub("m", "м", text)
	text = re.sub("n", "н", text)
	text = re.sub("ŋ", "ҥ", text)
	text = re.sub("p", "п", text)
	text = re.sub("r", "р", text)
	text = re.sub("s", "с", text)
	text = re.sub("t", "т", text)
	text = re.sub("f", "ф", text)
	text = re.sub("x", "х", text)
	text = re.sub("h", "һ", text)
	text = re.sub("ʃ", "ш", text)
	text = re.sub("ɕ", "щ", text)
	text = re.sub("ʔ", "ъ", text)
	text = re.sub("ʲ", "ь", text)

	# single-sound vowels:
	text = re.sub("ɑ", "а", text)
	text = re.sub("æ", "ә", text)
	text = re.sub("ɔ", "о", text)
	text = re.sub("ɵ", "ө", text)
	text = re.sub("ʏ", "ү", text)
	text = re.sub("ɤ", "ы", text)
	text = re.sub("e", "э", text)

	# anti-rules 1-4 are similar to those for Kazakh:

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([әөүе])(ł)([^әөүеʲ])", r"\1л\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"\bw([оө])", r"\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"w(дь)", r"у\1", text)
	text = re.sub(r"w(нь)", r"у\1", text)
	text = re.sub(r"w([бвгҕдзйклмнҥпрстфхһцчшщъьчцж])", r"у\1", text)

	'''
	anti-rule 4:
	'''
	text = re.sub(r"i(дь)", r"и\1", text)
	text = re.sub(r"i(нь)", r"и\1", text)
	text = re.sub(r"i([бвгҕдзйклмнҥпрстфхһцчшщъьчцж])", r"и\1", text)

	return text

	# testing sakha scripts

	# testing bashkir scripts

	def test_sakha(text):
	input_text = text.lower().split()
	output_text = ipa_to_sakha(sakha_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)

	# uyghur scripts

	def uyghur_to_ipa(text):
	# we shall begin with sound combinations:
	# the longer a combination, the upper it is on the list
	# single sounds should go to the bottom, with consonants taking precedence over vowels.
	# consonants are less likely to change than vowels.

	# for convenience, we shall use single symbols to denote multiple-sound combinations.
	# we can later convert them to conventional symbols.

	# two-sound convenience consonants:
	text = re.sub("[Jj]", "Ǯ", text)
	text = re.sub("Ch", "ʆ", text)
	text = re.sub("ch", "ʆ", text)

	# two-sound convenience vowels:
	text = re.sub("[Ii]", "ǐ", text)
	text = re.sub("[Uu]", "u", text)

	# single-sound consonants:
	text = re.sub("[Bb]", "b", text)
	text = re.sub("[Dd]", "d", text)
	text = re.sub("[Ff]", "f", text)
	text = re.sub("Gh", "ɣ", text)
	text = re.sub("gh", "ɣ", text)
	text = re.sub("[Gg]", "g", text)
	text = re.sub("[Hh]", "h", text)
	text = re.sub("[Kk]", "k", text)
	text = re.sub("[Ll]", "l", text)
	text = re.sub("[Mm]", "m", text)
	text = re.sub("[Nn]", "n", text)
	text = re.sub("Ng", "ŋ", text)
	text = re.sub("ng", "ŋ", text)
	text = re.sub("[Pp]", "p", text)
	text = re.sub("[Qq]", "q", text)
	text = re.sub("[Rr]", "r", text)
	text = re.sub("[Ss]", "s", text)
	text = re.sub("Sh", "ʃ", text)
	text = re.sub("sh", "ʃ", text)
	text = re.sub("[Tt]", "t", text)
	text = re.sub("[Ww]", "v", text)
	text = re.sub("[Xx]", "x", text)
	text = re.sub("[Yy]", "j", text)
	text = re.sub("[Zz]", "z", text)
	text = re.sub("Zh", "ʒ", text)
	text = re.sub("zh", "ʒ", text)

	# single-sound vowels:
	text = re.sub("[Aa]", "ɑ", text)
	text = re.sub("[Ee]", "æ", text)
	text = re.sub("[ËÉëé]", "e", text)
	text = re.sub("[Oo]", "ɔ", text)
	text = re.sub("[Öö]", "ɵ", text)
	text = re.sub("[Üü]", "ʏ", text)

	# hard sign
	text = re.sub("'", "ʔ", text)

	'''
	rule 1:
	if [æ], [e], [ɵ], [ǐ] are followed by [l] and [l] is NOT followed by [æ], [e], [ɵ], [ǐ],
	use [ł] instead of [l].
	'''
	text = re.sub(r"([æɵǐeʏ])(l)([^æɵǐeʏ])", r"\1ł\3", text)

	'''
	rule 2:
	if the letter "u" [u] is followed by consonants, use [w] instead of [u].
	'''
	text = re.sub(r"u([bvgɣdzjkqlłmnŋprstfxhʃʆǮʒ])", r"w\1", text)

	'''
	rule 3:
	if the letter "i" [ǐ] is followed by consonants, use [i] instead of [ǐ].
	'''
	text = re.sub(r"ǐ([bvgɣdzjkqlłmnŋprstfxhʃʆǮʒ])", r"i\1", text)

	return text

	def ipa_to_uyghur(text):
	# two-sound convenience consonants:
	text = re.sub("j", "y", text) # exception! precedence issue
	text = re.sub("Ǯ", "j", text)
	text = re.sub("ʆ", "ch", text)
	text = re.sub("ʒ", "zh", text)

	# two-sound convenience vowels:
	text = re.sub("ǐ", "i", text)
	text = re.sub("u", "u", text)

	# single-sound convenience consonants:
	text = re.sub("b", "b", text)
	text = re.sub("d", "d", text)
	text = re.sub("f", "f", text)
	text = re.sub("g", "g", text)
	text = re.sub("ɣ", "gh", text)
	text = re.sub("h", "h", text)
	text = re.sub("k", "k", text)
	text = re.sub("l", "l", text)
	text = re.sub("m", "m", text)
	text = re.sub("n", "n", text)
	text = re.sub("ŋ", "ng", text)
	text = re.sub("p", "p", text)
	text = re.sub("q", "q", text)
	text = re.sub("r", "r", text)
	text = re.sub("s", "s", text)
	text = re.sub("ʃ", "sh", text)
	text = re.sub("t", "t", text)
	text = re.sub("v", "w", text)
	text = re.sub("x", "x", text)
	text = re.sub("z", "z", text)

	# single-sound convenience vowels:
	text = re.sub("ɑ", "a", text)
	text = re.sub("e", "ë", text) # precedence
	text = re.sub("æ", "e", text)
	text = re.sub("ɵ", "ö", text)
	text = re.sub("ɔ", "o", text)
	text = re.sub("ʏ", "ü", text)

	# hard sign
	text = re.sub("ʔ", "'", text)

	'''
	anti-rule 1:
	'''
	text = re.sub(r"([eëiöü])(ł)([^eëiöü])", r"\1l\3", text)

	'''
	anti-rule 2:
	'''
	text = re.sub(r"w([bcvgdjzklmnpqrstfhyx])", r"u\1", text)

	'''
	anti-rule 3:
	'''
	text = re.sub(r"i([bcvgdjzklmnpqrstfhyx])", r"i\1", text)

	return text

	# testing uyghur scripts

	def test_uyghur(text):
	input_text = text.lower().split()
	output_text = ipa_to_uyghur(uyghur_to_ipa(text)).split()

	input_difference = []
	output_difference = []
	for item in input_text:
	if item not in output_text:
	input_difference.append(item)
	for item in output_text:
	if item not in input_text:
	output_difference.append(item)

	if input_text == output_text:
	print("input text and output text -- identical")
	else:
	print("input text and output text -- different")
	print("input:", input_difference)
	print("output:", output_difference)