File size: 723 Bytes
937a9da
 
35b8bdf
937a9da
 
35b8bdf
937a9da
 
 
 
 
 
 
 
 
 
 
35b8bdf
 
 
937a9da
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import re
from text.japanese import japanese_to_romaji_with_accent
from text.k2j import korean2katakana
from text.symbols import symbols


_cleaner_cleans = re.compile('['+'^'.join(symbols)+']')


def japanese_cleaners(text):
    text = japanese_to_romaji_with_accent(text)
    text = re.sub(r'([A-Za-z])$', r'\1.', text)
    return text


def japanese_cleaners2(text):
    text = japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…')
    text = '[JA]'+re.sub(r'\[KO\](.*?)\[KO\]', lambda x: korean2katakana(x.group(1))+'.', text)+'[JA]'
    text = re.sub(r'\[JA\](.*?)\[JA\]', lambda x: japanese_cleaners(x.group(1))+' ', text)
    text = ''.join(_cleaner_cleans.findall(text)).replace(' ', '')
    return text