Spaces:
Sleeping
Sleeping
from OldHangeul import text_to_jamo | |
import torch | |
import string | |
# ํ๊ธ ์๋ชจ์ ๋ก๋ง์ ๋์ ๊ฐ | |
initials = { | |
'แ': 'K', 'แ': 'KK', 'แ': 'N', 'แ': 'T', 'แ': 'TT', 'แ ': 'R', 'แ': 'M', 'แ': 'P', 'แ': 'PP', 'แ': 'S', 'แ': 'SS', | |
'แ': 'NG', 'แ': 'C', 'แ': 'CC', 'แ': 'CH', 'แ': 'KH', 'แ': 'TH', 'แ': 'PH', 'แ': 'H' | |
} | |
medials = { | |
'แ ก': 'a', 'แ ข': 'ae', 'แ ฃ': 'ya', 'แ ค': 'yae', 'แ ฅ': 'eo', 'แ ฆ': 'e', 'แ ง': 'yeo', 'แ จ': 'ye', 'แ ฉ': 'o', 'แ ช': 'wa', | |
'แ ซ': 'wae', 'แ ฌ': 'oe', 'แ ญ': 'yo', 'แ ฎ': 'u', 'แ ฏ': 'wo', 'แ ฐ': 'we', 'แ ฑ': 'wi', 'แ ฒ': 'yu', 'แ ณ': 'eu', 'แ ด': 'ui', 'แ ต': 'i' | |
} | |
finals = { | |
'': '', 'แจ': 'k', 'แฉ': 'kk', 'แช': 'ks', 'แซ': 'n', 'แฌ': 'nj', 'แญ': 'nh', 'แฎ': 't', 'แฏ': 'r', 'แฐ': 'rk', 'แฑ': 'rm', | |
'แฒ': 'rb', 'แณ': 'rs', 'แด': 'rt', 'แต': 'rp', 'แถ': 'rh', 'แท': 'm', 'แธ': 'p', 'แน': 'ps', 'แบ': 's', 'แป': 'ss', 'แผ': 'ng', | |
'แฝ': 'c', 'แพ': 'ch', 'แฟ': 'kh', 'แ': 'th', 'แ': 'ph', 'แ': 'h' | |
} | |
# ์ญ๋ฐฉํฅ ๋งคํ์ ์ํ ์ฌ์ ์์ฑ | |
rev_initials = {v: k for k, v in initials.items()} | |
rev_medials = {v: k for k, v in medials.items()} | |
rev_finals = {v: k for k, v in finals.items()} | |
def hangul_to_roman(hangul): | |
result = [] | |
for char in hangul: | |
if '๊ฐ' <= char <= 'ํฃ': | |
jamos=text_to_jamo(char, compatibility=False, spacing=False) | |
initial = initials[jamos[0]] | |
medial = medials[jamos[2]] | |
final = finals[jamos[4]] if len(jamos) == 5 else '' | |
result.append(initial + medial + final) | |
else: | |
result.append(char) | |
return ''.join(result) | |
all_letters = string.ascii_letters + " .,;'" | |
n_letters = len(all_letters) | |
# all_letters ๋ก ๋ฌธ์์ ์ฃผ์ ์ฐพ๊ธฐ, ์์ "a" = 0 | |
def letterToIndex(letter): | |
return all_letters.find(letter) | |
# ํ ์ค(์ด๋ฆ)์ <line_length x 1 x n_letters>, | |
# ๋๋ One-Hot ๋ฌธ์ ๋ฒกํฐ์ Array๋ก ๋ณ๊ฒฝ | |
def lineToTensor(line): | |
tensor = torch.zeros(len(line), 1, n_letters) | |
for li, letter in enumerate(line): | |
tensor[li][0][letterToIndex(letter)] = 1 | |
return tensor |