|
import pyopenjtalk
|
|
import re
|
|
import sys
|
|
import os
|
|
|
|
|
|
sys.stdout = open(os.devnull, 'w')
|
|
sys.stderr = open(os.devnull, 'w')
|
|
|
|
|
|
|
|
|
|
|
|
sys.stdout = sys.__stdout__
|
|
sys.stderr = sys.__stderr__
|
|
|
|
|
|
hiragana_to_katakana = str.maketrans(
|
|
"ぁあぃいぅうぇえぉおかがきぎくぐけげこご"
|
|
"さざしじすずせぜそぞただちぢっつづてでとど"
|
|
"なにぬねのはばぱひびぴふぶぷへべぺほぼぽ"
|
|
"まみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ",
|
|
"ァアィイゥウェエォオカガキギクグケゲコゴ"
|
|
"サザシジスズセゼソゾタダチヂッツヅテデトド"
|
|
"ナニヌネノハバパヒビピフブプヘベペホボポ"
|
|
"マミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ"
|
|
)
|
|
|
|
|
|
def hiragana_to_katakana_func(text):
|
|
return text.translate(hiragana_to_katakana)
|
|
|
|
|
|
def split_into_moras(kana):
|
|
|
|
mora_pattern = re.compile(
|
|
r"(?:[ァ-ヴー]|[ぁ-ゖ]|ー)[ァィゥェォャュョ]?|ー"
|
|
)
|
|
moras = mora_pattern.findall(kana)
|
|
return moras
|
|
|
|
|
|
def annotate_kana_with_accent(moras, acc):
|
|
annotated_moras = []
|
|
for i, mora in enumerate(moras):
|
|
annotated_moras.append(mora)
|
|
|
|
if acc == 0 and i == 0:
|
|
annotated_moras.append('↑')
|
|
|
|
elif acc > 1 and i == 0:
|
|
annotated_moras.append('↑')
|
|
|
|
elif acc > 0 and i + 1 == acc:
|
|
annotated_moras.append('↓')
|
|
return ''.join(annotated_moras)
|
|
|
|
|
|
def get_katakana_with_accent(text):
|
|
current_accent = 0
|
|
|
|
|
|
tokens = pyopenjtalk.run_frontend(text)
|
|
result = ''
|
|
for token in tokens:
|
|
|
|
mora_size = token['mora_size']
|
|
if mora_size > 1:
|
|
pron = token['pron']
|
|
acc = token['acc']
|
|
|
|
kana = pyopenjtalk.g2p(pron, kana=True)
|
|
|
|
kana = hiragana_to_katakana_func(kana)
|
|
|
|
moras = split_into_moras(kana)
|
|
|
|
annotated_kana = annotate_kana_with_accent(moras, acc)
|
|
result += annotated_kana
|
|
elif mora_size == 0 or token['pron'] == '’':
|
|
|
|
result += token['string']
|
|
else:
|
|
result += token['pron']
|
|
result.replace('’', '↑')
|
|
return result
|
|
|
|
import pyopenjtalk
|
|
import re
|
|
def pyopenjtalk_g2p_prosody(text, drop_unvoiced_vowels=True):
|
|
"""Extract phoneme + prosoody symbol sequence from input full-context labels.
|
|
|
|
The algorithm is based on `Prosodic features control by symbols as input of
|
|
sequence-to-sequence acoustic modeling for neural TTS`_ with some r9y9's tweaks.
|
|
|
|
Args:
|
|
text (str): Input text.
|
|
drop_unvoiced_vowels (bool): whether to drop unvoiced vowels.
|
|
|
|
Returns:
|
|
List[str]: List of phoneme + prosody symbols.
|
|
|
|
Examples:
|
|
>>> from espnet2.text.phoneme_tokenizer import pyopenjtalk_g2p_prosody
|
|
>>> pyopenjtalk_g2p_prosody("こんにちは。")
|
|
['^', 'k', 'o', '[', 'N', 'n', 'i', 'ch', 'i', 'w', 'a', '$']
|
|
|
|
.. _`Prosodic features control by symbols as input of sequence-to-sequence acoustic
|
|
modeling for neural TTS`: https://doi.org/10.1587/transinf.2020EDP7104
|
|
|
|
"""
|
|
labels = pyopenjtalk.make_label(pyopenjtalk.run_frontend(text))
|
|
|
|
N = len(labels)
|
|
|
|
phones = []
|
|
for n in range(N):
|
|
lab_curr = labels[n]
|
|
|
|
|
|
p3 = re.search(r"\-(.*?)\+", lab_curr).group(1)
|
|
|
|
if drop_unvoiced_vowels and p3 in "AEIOU":
|
|
p3 = p3.lower()
|
|
|
|
|
|
if p3 == "sil":
|
|
assert n == 0 or n == N - 1
|
|
if n == 0:
|
|
phones.append("^")
|
|
elif n == N - 1:
|
|
|
|
e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr)
|
|
if e3 == 0:
|
|
phones.append("$")
|
|
elif e3 == 1:
|
|
phones.append("?")
|
|
continue
|
|
elif p3 == "pau":
|
|
phones.append("_")
|
|
continue
|
|
else:
|
|
phones.append(p3)
|
|
|
|
|
|
a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr)
|
|
a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr)
|
|
a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr)
|
|
|
|
|
|
f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr)
|
|
|
|
a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1])
|
|
|
|
if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl":
|
|
phones.append("#")
|
|
|
|
elif a1 == 0 and a2_next == a2 + 1 and a2 != f1:
|
|
phones.append("]")
|
|
|
|
elif a2 == 1 and a2_next == 2:
|
|
phones.append("[")
|
|
|
|
return phones
|
|
|
|
def _numeric_feature_by_regex(regex, s):
|
|
match = re.search(regex, s)
|
|
if match is None:
|
|
return -50
|
|
return int(match.group(1))
|
|
import pyopenjtalk
|
|
def build_phone_to_katakana():
|
|
|
|
basic_katakana = [
|
|
'ア', 'イ', 'ウ', 'エ', 'オ',
|
|
'カ', 'キ', 'ク', 'ケ', 'コ',
|
|
'サ', 'シ', 'ス', 'セ', 'ソ',
|
|
'タ', 'チ', 'ツ', 'テ', 'ト',
|
|
'ナ', 'ニ', 'ヌ', 'ネ', 'ノ',
|
|
'ハ', 'ヒ', 'フ', 'ヘ', 'ホ',
|
|
'マ', 'ミ', 'ム', 'メ', 'モ',
|
|
'ヤ', 'ユ', 'ヨ',
|
|
'ラ', 'リ', 'ル', 'レ', 'ロ',
|
|
'ワ', 'ヲ', 'ン',
|
|
'ガ', 'ギ', 'グ', 'ゲ', 'ゴ',
|
|
'ザ', 'ジ', 'ズ', 'ゼ', 'ゾ',
|
|
'ダ', 'ヂ', 'ヅ', 'デ', 'ド',
|
|
'バ', 'ビ', 'ブ', 'ベ', 'ボ',
|
|
'パ', 'ピ', 'プ', 'ペ', 'ポ',
|
|
'キャ', 'キュ', 'キョ',
|
|
'シャ', 'シュ', 'ショ',
|
|
'チャ', 'チュ', 'チョ',
|
|
'ニャ', 'ニュ', 'ニョ',
|
|
'ヒャ', 'ヒュ', 'ヒョ',
|
|
'ミャ', 'ミュ', 'ミョ',
|
|
'リャ', 'リュ', 'リョ',
|
|
'ギャ', 'ギュ', 'ギョ',
|
|
'ジャ', 'ジュ', 'ジョ',
|
|
'ビャ', 'ビュ', 'ビョ',
|
|
'ピャ', 'ピュ', 'ピョ',
|
|
'ヴァ', 'ヴィ', 'ヴ', 'ヴェ', 'ヴォ',
|
|
'ファ', 'フィ', 'フェ', 'フォ',
|
|
'ウィ', 'ウェ', 'ウォ',
|
|
'ティ', 'トゥ',
|
|
'ディ', 'ドゥ',
|
|
'ツァ', 'ツィ', 'ツェ', 'ツォ',
|
|
'デュ', 'デョ',
|
|
'ジェ', 'ジョ',
|
|
'チェ', 'チョ',
|
|
'シェ', 'ショ',
|
|
'ヂェ', 'ヂョ',
|
|
'ヒェ', 'ヒョ',
|
|
'ビェ', 'ビョ',
|
|
'ピェ', 'ピョ',
|
|
'キェ', 'キョ',
|
|
'ギェ', 'ギョ',
|
|
'ミェ', 'ミョ',
|
|
'リェ', 'リョ',
|
|
'アァ', 'イィ', 'ウゥ', 'エェ', 'オォ',
|
|
'ヴャ', 'ヴュ', 'ヴョ',
|
|
'ッ', 'ー'
|
|
]
|
|
|
|
|
|
katakana_to_phone = {}
|
|
|
|
for kana in basic_katakana:
|
|
|
|
|
|
|
|
phones = pyopenjtalk.g2p(kana)
|
|
|
|
|
|
phones = phones.strip('')
|
|
|
|
katakana_to_phone[kana] = phones
|
|
|
|
phone_to_katakana = {}
|
|
|
|
for kana, phones in katakana_to_phone.items():
|
|
|
|
phone_to_katakana[phones] = kana
|
|
return phone_to_katakana, katakana_to_phone
|
|
|
|
def phones_list_to_katakana(phone_list, phone_to_katakana):
|
|
output = ''
|
|
i = 0
|
|
length = len(phone_list)
|
|
special_symbols = {'^', '_', '[', ']', '#', '$', '?', '↑', '↓'}
|
|
|
|
while i < length:
|
|
phone = phone_list[i]
|
|
if phone in special_symbols:
|
|
output += phone
|
|
i += 1
|
|
else:
|
|
max_match_length = 5
|
|
match_found = False
|
|
for l in range(max_match_length, 0, -1):
|
|
if i + l <= length:
|
|
phones_seq = ' '.join(phone_list[i:i+l])
|
|
if phones_seq in phone_to_katakana:
|
|
output += phone_to_katakana[phones_seq]
|
|
i += l
|
|
match_found = True
|
|
break
|
|
if not match_found:
|
|
single_phone = phone_list[i]
|
|
if single_phone in phone_to_katakana:
|
|
output += phone_to_katakana[single_phone]
|
|
i += 1
|
|
else:
|
|
print(f"无法映射的音素: {single_phone}")
|
|
i += 1
|
|
if len(output) == 0:
|
|
return "…"
|
|
return output.replace("[", "↑").replace("]", "↓")
|
|
def katakana_to_phones_list(katakana_list, katakana_to_phone):
|
|
output = []
|
|
i = 0
|
|
length = len(katakana_list)
|
|
special_symbols = {'^', '_', '[', ']', '#', '$', '?', '↑', '↓'}
|
|
|
|
while i < length:
|
|
katakana = katakana_list[i]
|
|
if katakana in special_symbols:
|
|
output.append(katakana)
|
|
i += 1
|
|
else:
|
|
max_match_length = 5
|
|
match_found = False
|
|
for l in range(max_match_length, 0, -1):
|
|
if i + l <= length:
|
|
katakana_seq = ''.join(katakana_list[i:i+l])
|
|
if katakana_seq in katakana_to_phone:
|
|
output.append(katakana_to_phone[katakana_seq])
|
|
i += l
|
|
match_found = True
|
|
break
|
|
if not match_found:
|
|
single_katakana = katakana_list[i]
|
|
if single_katakana in katakana_to_phone:
|
|
output.append(katakana_to_phone[single_katakana])
|
|
i += 1
|
|
else:
|
|
print(f"无法映射的片假名: {single_katakana}")
|
|
i += 1
|
|
if len(output) == 0:
|
|
return ["…"]
|
|
return output
|
|
|
|
phone_to_katakana, katakana_to_phone = build_phone_to_katakana()
|
|
|
|
def surface_to_katakana_with_accent(text):
|
|
text = text.replace("…", "")
|
|
phones = pyopenjtalk_g2p_prosody(text)
|
|
return phones_list_to_katakana(phones, phone_to_katakana)
|
|
|
|
def katakana_to_phones(katakana, katakana_to_phone = katakana_to_phone):
|
|
katakana_list = list(katakana)
|
|
phone_list = katakana_to_phones_list(katakana_list, katakana_to_phone)
|
|
return ' '.join(phone_list).replace("^", "").replace("#", "").replace("$", "").replace(" "," ").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess_text(text):
|
|
|
|
return surface_to_katakana_with_accent(text)
|
|
|
|
if __name__ == "__main__":
|
|
text = "^キョ↓オワ#ワ↑タシノ#マ↑ホオ#エ↑ネル↓キイノ#ホ↑キュウノ#タ↑メ↓ギ$"
|
|
annotated_text = katakana_to_phones(text)
|
|
print(annotated_text) |