DebasishDhal99
commited on
Commit
•
53bd534
1
Parent(s):
34a0211
Create hungarian.py
Browse files- hungarian.py +60 -0
hungarian.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
special_combs = {
|
2 |
+
"Dzs" : "Ј", "dzs" : "ј",
|
3 |
+
"Dz" : "Ъ", "dz" : "ъ", #Actually the sound of ds in kids
|
4 |
+
"Cs" : "Ч", "cs" : "ч",
|
5 |
+
"Zs" : "Ж", "zs" : "ж",
|
6 |
+
"Sz" : "S", "sz" : "s",
|
7 |
+
"Ly" : "y", "ly" : "y"
|
8 |
+
}
|
9 |
+
|
10 |
+
hungarian_dict = {
|
11 |
+
"á" : "a", "Á" : "A",
|
12 |
+
"é" : "e", "É" : "E",
|
13 |
+
"í" : "i", "Í" : "I",
|
14 |
+
"ó" : "o", "Ó" : "O",
|
15 |
+
"ö" : "a", "Ö" : "A",
|
16 |
+
"ő" : "a", "Ő" : "A",
|
17 |
+
"ú" : "u", "Ú" : "U",
|
18 |
+
"ü" : "ю", "Ü" : "Ю",
|
19 |
+
"ű" : "ю", "Ű" : "Ю",
|
20 |
+
"j" : "y", "J" : "Y"
|
21 |
+
}
|
22 |
+
|
23 |
+
cyrillic_equiv_dict = {
|
24 |
+
"ъ" : "ds", "ь" : "Ds",
|
25 |
+
"ч" : "ch", "Ч" : "Ch",
|
26 |
+
"ж" : "zh", "Ж" : "Zh",
|
27 |
+
"ш" : "sh", "Ш" : "Sh",
|
28 |
+
"ј" : "j", "Ј" : "J",
|
29 |
+
"ю" : "yu", "Ю" : "Yu"
|
30 |
+
}
|
31 |
+
|
32 |
+
def check_special_comb(word):
|
33 |
+
for comb in special_combs:
|
34 |
+
if comb in word:
|
35 |
+
word = word.replace(comb,special_combs[comb])
|
36 |
+
return word
|
37 |
+
|
38 |
+
def hungarian_letter_to_eng(letter):
|
39 |
+
if letter in hungarian_dict:
|
40 |
+
return hungarian_dict[letter]
|
41 |
+
else:
|
42 |
+
return letter
|
43 |
+
|
44 |
+
def cyrillic_to_eng(word):
|
45 |
+
for cyrillic in cyrillic_equiv_dict:
|
46 |
+
if cyrillic in word:
|
47 |
+
word = word.replace(cyrillic,cyrillic_equiv_dict[cyrillic])
|
48 |
+
return word
|
49 |
+
|
50 |
+
|
51 |
+
def hungarian_word_to_eng(word):
|
52 |
+
assert type(word)==str, "Input must be a string"
|
53 |
+
# print("Original word: ", word)
|
54 |
+
word = check_special_comb(word)
|
55 |
+
# print("Just after special combination replacement: -",word)
|
56 |
+
word = ''.join([hungarian_letter_to_eng(letter) for letter in word])
|
57 |
+
# print("After regular word replacement: -",word)
|
58 |
+
word = cyrillic_to_eng(word)
|
59 |
+
# print("Simplified pronunciation: -",word)
|
60 |
+
return word
|