DebasishDhal99
commited on
Commit
·
8047fd0
1
Parent(s):
a077c2b
Create polish.py
Browse files
polish.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
special_combs = {'szcz':'щ','Szcz':'Щ','ch':'х','Ch':'Х','cz':'ч','Cz':'Ч','dz':'ϳ','Dz':'Ј','dź':'ϳ','Dź':'Ј','dż':'ϳ','Dż':'Ј',
|
2 |
+
'rz':'ж','Rz':'Ж','sz':'ш','Sz':'Ш','ż':'ж','Ż':'Ж','ź':'ж','Ź':'Ж','c':'ц','C':'Ц'}
|
3 |
+
|
4 |
+
polish_dict = {'a':'a','A':'A','ą':'an','Ą':'An','b':'b','B':'B','ć':'ch','Ć':'Ch','d':'d','D':'D','e':'e','E':'E','ę':'en',
|
5 |
+
'Ę':'En','f':'f','F':'F','g':'g','G':'G','h':'h','H':'H','i':'i','I':'I','j':'y','J':'Y','k':'k','K':'K',
|
6 |
+
'l':'l','L':'L','ł':'w','Ł':'W','m':'m','M':'M','n':'n','N':'N','ń':'ny','Ń':'Ny','o':'o','O':'O','ó':'u',
|
7 |
+
'Ó':'U','p':'p','P':'P','r':'r','R':'R','s':'s','S':'S','ś':'sh','Ś':'Sh','t':'t','T':'T','u':'u','U':'U',
|
8 |
+
'w':'v','W':'V','y':'y','Y':'Y','z':'z','Z':'Z'}
|
9 |
+
|
10 |
+
cyrillic_equiv_dict = {'щ':'sh','Щ':'Sh','х':'kh','Х':'Kh','ч':'ch','Ч':'Ch','ϳ':'j','Ј':'J','ж':'zh','Ж':'Zh', 'ш':'sh','Ш':'Sh',
|
11 |
+
'ц':'ts','Ц':'Ts'}
|
12 |
+
|
13 |
+
def polish_letter_to_eng(letter):
|
14 |
+
if letter in polish_dict:
|
15 |
+
return polish_dict[letter]
|
16 |
+
else:
|
17 |
+
return letter
|
18 |
+
|
19 |
+
def cyrillic_to_eng(word):
|
20 |
+
for cyrillic in cyrillic_equiv_dict:
|
21 |
+
if cyrillic in word:
|
22 |
+
word = word.replace(cyrillic,cyrillic_equiv_dict[cyrillic])
|
23 |
+
return word
|
24 |
+
|
25 |
+
def polish_sentence_to_latin(word):
|
26 |
+
assert type(word)==str, "Input must be a string"
|
27 |
+
#print("Original word: -",word)
|
28 |
+
word = check_special_comb(word)
|
29 |
+
#print("Just after special combination replacement: -",word)
|
30 |
+
word = [polish_letter_to_eng(letter) for letter in word]
|
31 |
+
word = ''.join(word)
|
32 |
+
word = cyrillic_to_eng(word)
|
33 |
+
return word
|