Spaces:

DebasishDhal99
/

The-Language-Transliteration-Project

Running

App Files Files Community

DebasishDhal99 commited on Aug 26, 2023

Commit

6cfbfef

•

1 Parent(s): 2e3d8ee

Create turkish.py

Browse files

Files changed (1) hide show

turkish.py +67 -0

turkish.py ADDED Viewed

	@@ -0,0 +1,67 @@

+special_combs = {"c" : "ј", "C" : "Ј"} #These are Serbian J characters, they will be later converted to Latin J.
+turkish_dict = {
+"ç" : "ch" , "Ç" : "Ch",
+"ğ" : "'" , "Ğ" : "'",
+"ı" : "e" , "I": "E",
+"i" : "i" , "İ": "I",
+"j" : "zh" , "J": "zh",
+"ö" : "o" , "Ö" : "O",
+"ş" : "sh" , "Ş" : "Sh",
+"ü" : "yu" , "Ü" : "Yu",
+"w" : "v" , "W" : "V",
+}
+cyrillic_equiv_dict = {
+    "ј" : "j" , "Ј" : "J",
+    "ў" : "w"
+}
+def check_special_comb(word):
+    for comb in special_combs:
+        if comb in word:
+            word = word.replace(comb,special_combs[comb])
+    return word
+def cyrillic_to_eng(word):
+    for cyrillic in cyrillic_equiv_dict:
+        if cyrillic in word:
+            word = word.replace(cyrillic,cyrillic_equiv_dict[cyrillic])
+    return word
+def turkish_letter_to_eng(letter):
+    if letter in turkish_dict:
+        return turkish_dict[letter]
+    else:
+        return letter
+def turkish_word_to_latin(word):
+    assert type(word)==str
+    word = check_special_comb(word)
+    if word.endswith("ı"):
+        word = word[:-1] + "aў"
+    if word.endswith("er"):
+        word = word[:-2]+"ar"
+    word = check_special_comb(word)
+    word = ''.join([turkish_letter_to_eng(letter) for letter in word])
+    word = cyrillic_to_eng(word)
+    return word
+def turkish_sentence_to_latin(sentence):
+    word_list = word_tokenize(sentence)
+    processed_word_list = []
+    for word in word_list:
+        try:
+            input_word = word
+            processed_word_list.append(turkish_word_to_latin(word))
+        except:
+            processed_word_list.append(input_word)
+    return " ".join(processed_word_list)