StableTTS_en-demo / text /mandarin.py
KdaiP's picture
Upload 238 files
d358e26 verified
import re
from pypinyin import lazy_pinyin, Style
from .custom_pypinyin_dict import phrase_pinyin_data
phrase_pinyin_data.load()
import jieba
from .cn2an import an2cn
# ζ ‡η‚Ήη¬¦ε·ζ­£εˆ™
punc_map = {
":": ",",
"οΌ›": ",",
",": ",",
"。": ".",
"!": "!",
"?": "?",
"\n": ".",
"Β·": ",",
"、": ",",
"$": ".",
"β€œ": "'",
"”": "'",
'"': "'",
"β€˜": "'",
"’": "'",
"(": "'",
"οΌ‰": "'",
"(": "'",
")": "'",
"γ€Š": "'",
"》": "'",
"【": "'",
"】": "'",
"[": "'",
"]": "'",
"β€”": "-",
"~": "~",
"γ€Œ": "'",
"」": "'",
"γ€Ž": "'",
"』": "'",
}
punc_table = str.maketrans(punc_map)
# ζ•°ε­—ζ­£εˆ™εŒ–
number_pattern = re.compile(r'\d+(?:\.?\d+)?')
def replace_number(match):
return an2cn(match.group())
def normalize_number(text):
return number_pattern.sub(replace_number, text)
# get symbols of phones
def load_pinyin_symbols(path):
pinyin_dict={}
temp = []
with open(path, "r", encoding='utf-8') as f:
content = f.readlines()
for line in content:
cuts = line.strip().split(',')
pinyin = cuts[0]
phones = cuts[1].split(' ')
pinyin_dict[pinyin] = phones
temp.extend(phones)
temp = list(set(temp))
tone = []
for phone in temp:
for i in range(1, 6):
phone2 = phone + str(i)
tone.append(phone2)
print(sorted(tone, key=lambda x: len(x)))
return pinyin_dict
def load_pinyin_dict(path):
pinyin_dict = {}
with open(path, "r", encoding='utf-8') as f:
for line in f:
key, value = line.strip().split(',', 1)
pinyin_dict[key] = value.split()
return pinyin_dict
pinyin_dict = load_pinyin_dict('text/cnm3/ds_CNM3.txt')
def chinese_to_cnm3(text: str):
text = text.translate(punc_table)
text = normalize_number(text)
words = jieba.lcut(text, cut_all=False)
phones = []
for word in words:
pinyin_list = lazy_pinyin(word, style=Style.TONE3, neutral_tone_with_five=True)
for pinyin in pinyin_list:
if pinyin[-1].isdigit():
tone = pinyin[-1]
syllable = pinyin[:-1]
phone = pinyin_dict[syllable]
phones.extend([ph + tone for ph in phone])
elif pinyin[-1].isalpha():
pass
else:
phones.extend(pinyin)
return phones