txya900619
commited on
Commit
·
5e8e534
1
Parent(s):
4aaa5e4
feat: init upload
Browse files- .gitignore +3 -0
- app.py +126 -0
- configs/ipa.yaml +8 -0
- configs/models.yaml +10 -0
- ipa/__init__.py +24 -0
- ipa/convert_digits.py +180 -0
- ipa/ipa.py +88 -0
- ipa/proc_text.py +85 -0
- replace/tts.py +70 -0
- requirements.txt +4 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
temp_config.json
|
3 |
+
flagged/
|
app.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
import TTS
|
7 |
+
from TTS.utils.synthesizer import Synthesizer
|
8 |
+
import numpy as np
|
9 |
+
from huggingface_hub import snapshot_download
|
10 |
+
from omegaconf import OmegaConf
|
11 |
+
|
12 |
+
from ipa.ipa import get_ipa, parse_ipa
|
13 |
+
from replace.tts import ChangedVitsConfig
|
14 |
+
|
15 |
+
TTS.tts.configs.vits_config.VitsConfig = ChangedVitsConfig
|
16 |
+
|
17 |
+
def load_model(model_id):
|
18 |
+
model_dir = snapshot_download(model_id)
|
19 |
+
config_file_path = os.path.join(model_dir, "config.json")
|
20 |
+
model_ckpt_path = os.path.join(model_dir, "model.pth")
|
21 |
+
speaker_file_path = os.path.join(model_dir, "speakers.pth")
|
22 |
+
language_file_path = os.path.join(model_dir, "language_ids.json")
|
23 |
+
|
24 |
+
temp_config_path = "temp_config.json"
|
25 |
+
with open(config_file_path, "r") as f:
|
26 |
+
content = f.read()
|
27 |
+
content = content.replace("speakers.pth", speaker_file_path)
|
28 |
+
content = content.replace("language_ids.json", language_file_path)
|
29 |
+
f.close()
|
30 |
+
with open(temp_config_path, "w") as f:
|
31 |
+
f.write(content)
|
32 |
+
f.close()
|
33 |
+
return Synthesizer(tts_checkpoint=model_ckpt_path, tts_config_path=temp_config_path)
|
34 |
+
|
35 |
+
OmegaConf.register_new_resolver("load_model", load_model)
|
36 |
+
|
37 |
+
models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
|
38 |
+
|
39 |
+
def text_to_speech(model_id: str, speaker: str, dialect, text: str):
|
40 |
+
model = models_config[model_id]["model"]
|
41 |
+
if len(text) == 0:
|
42 |
+
raise gr.Error("請勿輸入空字串。")
|
43 |
+
words, ipa, pinyin, missing_words = get_ipa(text, dialect=dialect)
|
44 |
+
if len(missing_words) > 0:
|
45 |
+
raise gr.Error(
|
46 |
+
f"句子中的[{','.join(missing_words)}]目前無法轉成 ipa。請嘗試其他句子。"
|
47 |
+
)
|
48 |
+
|
49 |
+
wav = model.tts(
|
50 |
+
parse_ipa(ipa),
|
51 |
+
speaker_name=speaker,
|
52 |
+
language_name=dialect,
|
53 |
+
split_sentences=False,
|
54 |
+
)
|
55 |
+
|
56 |
+
return words, pinyin, (16000, np.array(wav))
|
57 |
+
|
58 |
+
def when_model_selected(model_id):
|
59 |
+
model_config = models_config[model_id]
|
60 |
+
speaker_drop_down_choices = [(k,v) for k, v in model_config["speaker_mapping"].items()]
|
61 |
+
dialect_drop_down_choices = model_config["avalible_dialect"]
|
62 |
+
return gr.update(choices=speaker_drop_down_choices), gr.update(choices=dialect_drop_down_choices)
|
63 |
+
|
64 |
+
|
65 |
+
demo = gr.Blocks(
|
66 |
+
title="臺灣客語語音生成系統",
|
67 |
+
css="@import url(https://tauhu.tw/tauhu-oo.css);",
|
68 |
+
theme=gr.themes.Default(
|
69 |
+
font=(
|
70 |
+
"tauhu-oo",
|
71 |
+
gr.themes.GoogleFont("Source Sans Pro"),
|
72 |
+
"ui-sans-serif",
|
73 |
+
"system-ui",
|
74 |
+
"sans-serif",
|
75 |
+
)
|
76 |
+
),
|
77 |
+
)
|
78 |
+
|
79 |
+
with demo:
|
80 |
+
|
81 |
+
default_model_id = list(models_config.keys())[0]
|
82 |
+
model_drop_down = gr.Dropdown(
|
83 |
+
models_config.keys(),
|
84 |
+
value=default_model_id,
|
85 |
+
)
|
86 |
+
speaker_drop_down = gr.Dropdown(
|
87 |
+
choices=[(k,v) for k, v in models_config[default_model_id]["speaker_mapping"].items()],
|
88 |
+
value=list(models_config[default_model_id]["speaker_mapping"].values())[0]
|
89 |
+
)
|
90 |
+
dialect_drop_down = gr.Dropdown(
|
91 |
+
choices=models_config[default_model_id]["avalible_dialect"],
|
92 |
+
value=models_config[default_model_id]["avalible_dialect"][0]
|
93 |
+
)
|
94 |
+
|
95 |
+
model_drop_down.input(
|
96 |
+
when_model_selected,
|
97 |
+
inputs=[model_drop_down],
|
98 |
+
outputs=[speaker_drop_down, dialect_drop_down]
|
99 |
+
)
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
gr.Markdown(
|
104 |
+
"""
|
105 |
+
# 臺灣客語語音生成系統
|
106 |
+
"""
|
107 |
+
)
|
108 |
+
gr.Interface(
|
109 |
+
text_to_speech,
|
110 |
+
inputs=[
|
111 |
+
model_drop_down,
|
112 |
+
speaker_drop_down,
|
113 |
+
dialect_drop_down,
|
114 |
+
gr.Textbox(),
|
115 |
+
],
|
116 |
+
outputs=[
|
117 |
+
gr.Textbox(interactive=False, label="word segment"),
|
118 |
+
gr.Textbox(interactive=False, label="pinyin"),
|
119 |
+
gr.Audio(
|
120 |
+
interactive=False, label="generated speech", show_download_button=True
|
121 |
+
),
|
122 |
+
],
|
123 |
+
allow_flagging="auto",
|
124 |
+
)
|
125 |
+
|
126 |
+
demo.launch()
|
configs/ipa.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gh_token: ${oc.env:GH_TOKEN}
|
2 |
+
delimiter_list: ${gh_download:FormoSpeech/FormoLexicon, release/delimiters.json, ${gh_token}}
|
3 |
+
replace_dict: ${gh_download:FormoSpeech/FormoLexicon, release/replaced_words_htia.json, ${gh_token}}
|
4 |
+
v2f_dict: ${gh_download:FormoSpeech/FormoLexicon, [release/v2f_goyu.json, release/v2f_htia.json], ${gh_token}}
|
5 |
+
preserved_list: ${gh_download:FormoSpeech/FormoLexicon, release/preserved_words_htia.json, ${gh_token}}
|
6 |
+
lexicon:
|
7 |
+
sixian: ${gh_download:FormoSpeech/FormoLexicon, release/lexicon_htia_sixian_c.json, ${gh_token}}
|
8 |
+
hailu: ${gh_download:FormoSpeech/FormoLexicon, release/lexicon_htia_hailu_c.json, ${gh_token}}
|
configs/models.yaml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sixian-hailu-mix:
|
2 |
+
model: ${load_model:formospeech/taiwanese-hakka-tts-sixian-hailu-mix}
|
3 |
+
avalible_dialect:
|
4 |
+
- sixian
|
5 |
+
- hailu
|
6 |
+
speaker_mapping: # display_name: id
|
7 |
+
sixian/female: XF
|
8 |
+
sixian/male: XM
|
9 |
+
hailu/female: HF
|
10 |
+
hailu/male: HM
|
ipa/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
from omegaconf import OmegaConf
|
4 |
+
|
5 |
+
def gh_download(repo, path, token):
|
6 |
+
paths = [path] if isinstance(path, str) else path
|
7 |
+
result = None
|
8 |
+
headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.raw+json"}
|
9 |
+
for path in paths:
|
10 |
+
url = f"https://api.github.com/repos/{repo}/contents/{path}"
|
11 |
+
response = requests.get(url, headers=headers)
|
12 |
+
if response.status_code != 200:
|
13 |
+
raise Exception(f"Failed to download {path} from {repo}")
|
14 |
+
|
15 |
+
if result is None:
|
16 |
+
result = response.json()
|
17 |
+
elif isinstance(result, list):
|
18 |
+
result.extend(response.json())
|
19 |
+
elif isinstance(result, dict):
|
20 |
+
result.update(response.json())
|
21 |
+
|
22 |
+
return result
|
23 |
+
|
24 |
+
OmegaConf.register_new_resolver("gh_download", gh_download)
|
ipa/convert_digits.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2024 Hung-Shin Lee (hungshinlee@gmail.com)
|
2 |
+
# Apache 2.0
|
3 |
+
|
4 |
+
import itertools
|
5 |
+
import re
|
6 |
+
|
7 |
+
c_basic = "零一二三四五六七八九"
|
8 |
+
d2c = {str(d): c for d, c in enumerate(c_basic)}
|
9 |
+
d2c["."] = "點"
|
10 |
+
|
11 |
+
|
12 |
+
def num4year(matched):
|
13 |
+
def _num4year(num):
|
14 |
+
return "{}".format("".join([c_basic[int(i)] for i in num]))
|
15 |
+
|
16 |
+
matched_str = matched.group(0)
|
17 |
+
for m in matched.groups():
|
18 |
+
matched_str = matched_str.replace(m, _num4year(m))
|
19 |
+
return matched_str
|
20 |
+
|
21 |
+
|
22 |
+
def num2chines_simple(matched):
|
23 |
+
return "{}".format("".join([d2c[i] for i in matched]))
|
24 |
+
|
25 |
+
|
26 |
+
def num4percent(matched):
|
27 |
+
matched = matched.group(1)
|
28 |
+
return "百分之{}".format(num2chinese(matched[:-1]))
|
29 |
+
|
30 |
+
|
31 |
+
def num4cellphone(matched):
|
32 |
+
matched = matched.group(1)
|
33 |
+
matched = matched.replace(" ", "").replace("-", "")
|
34 |
+
return "".join([c_basic[int(i)] for i in matched])
|
35 |
+
|
36 |
+
|
37 |
+
def num4er(matched): # 2 to 二
|
38 |
+
matched = matched.group(1)
|
39 |
+
return matched.replace("2", "二")
|
40 |
+
|
41 |
+
|
42 |
+
def num4liang(matched): # 2 to 兩
|
43 |
+
matched = matched.group(1)
|
44 |
+
return matched.replace("2", "兩")
|
45 |
+
|
46 |
+
|
47 |
+
def num4general(matched):
|
48 |
+
num = matched.group(1)
|
49 |
+
if re.match("[A-Za-z-─]", num[0]):
|
50 |
+
if len(num[1:]) < 3:
|
51 |
+
# MP3 or F-16
|
52 |
+
return "{}{}".format(num[0], num2chinese(num[1:]))
|
53 |
+
else:
|
54 |
+
# AM104
|
55 |
+
return "{}{}".format(num[0], num2chines_simple(num[1:]))
|
56 |
+
|
57 |
+
else:
|
58 |
+
if re.match("[0-9]", num[0]):
|
59 |
+
return "{}".format(num2chinese(num))
|
60 |
+
else:
|
61 |
+
return "{}{}".format(num[0], num2chinese(num[1:]))
|
62 |
+
|
63 |
+
|
64 |
+
def parse_num(text: str) -> str:
|
65 |
+
# year
|
66 |
+
text = re.sub("([0-9]{4})[到至]([0-9]{4})年", num4year, text)
|
67 |
+
text = re.sub("([0-9]{4})年", num4year, text)
|
68 |
+
|
69 |
+
# percentage
|
70 |
+
text = re.sub(r"([0-9]+\.?[0-9]?%)", num4percent, text)
|
71 |
+
|
72 |
+
# cellphone
|
73 |
+
text = re.sub(r"([0-9]{4}\s?-\s?[0-9]{6})", num4cellphone, text)
|
74 |
+
|
75 |
+
# single 2 to 二
|
76 |
+
text = re.sub(r"([^\d]2[診樓月號])", num4er, text)
|
77 |
+
text = re.sub(r"([初]2[^\d])", num4er, text)
|
78 |
+
|
79 |
+
# single 2 to 兩
|
80 |
+
text = re.sub(r"([^\d]2[^\d])", num4liang, text)
|
81 |
+
|
82 |
+
# general number
|
83 |
+
text = re.sub(r"([^0-9]?[0-9]+\.?[0-9]?)", num4general, text)
|
84 |
+
|
85 |
+
return text
|
86 |
+
|
87 |
+
|
88 |
+
def num2chinese(num, big=False, simp=False, o=False, twoalt=True) -> str:
|
89 |
+
"""
|
90 |
+
Converts numbers to Chinese representations.
|
91 |
+
https://gist.github.com/gumblex/0d65cad2ba607fd14de7
|
92 |
+
`big` : use financial characters.
|
93 |
+
`simp` : use simplified characters instead of traditional characters.
|
94 |
+
`o` : use 〇 for zero.
|
95 |
+
`twoalt`: use 两/兩 for two when appropriate.
|
96 |
+
Note that `o` and `twoalt` is ignored when `big` is used,
|
97 |
+
and `twoalt` is ignored when `o` is used for formal representations.
|
98 |
+
"""
|
99 |
+
# check num first
|
100 |
+
nd = str(num)
|
101 |
+
if abs(float(nd)) >= 1e48:
|
102 |
+
raise ValueError("number out of range")
|
103 |
+
elif "e" in nd:
|
104 |
+
raise ValueError("scientific notation is not supported")
|
105 |
+
c_symbol = "正负点" if simp else "正負點"
|
106 |
+
if o: # formal
|
107 |
+
twoalt = False
|
108 |
+
if big:
|
109 |
+
c_basic = "零壹贰叁肆伍陆柒捌玖" if simp else "零壹貳參肆伍陸柒捌玖"
|
110 |
+
c_unit1 = "拾佰仟"
|
111 |
+
c_twoalt = "贰" if simp else "貳"
|
112 |
+
else:
|
113 |
+
c_basic = "〇一二三四五六七八九" if o else "零一二三四五六七八九"
|
114 |
+
c_unit1 = "十百千"
|
115 |
+
if twoalt:
|
116 |
+
c_twoalt = "两" if simp else "兩"
|
117 |
+
else:
|
118 |
+
c_twoalt = "二"
|
119 |
+
c_unit2 = "万亿兆京垓秭穰沟涧正载" if simp else "萬億兆京垓秭穰溝澗正載"
|
120 |
+
|
121 |
+
def revuniq(l):
|
122 |
+
return "".join(k for k, g in itertools.groupby(reversed(l)))
|
123 |
+
|
124 |
+
nd = str(num)
|
125 |
+
result = []
|
126 |
+
if nd[0] == "+":
|
127 |
+
result.append(c_symbol[0])
|
128 |
+
elif nd[0] == "-":
|
129 |
+
result.append(c_symbol[1])
|
130 |
+
if "." in nd:
|
131 |
+
integer, remainder = nd.lstrip("+-").split(".")
|
132 |
+
else:
|
133 |
+
integer, remainder = nd.lstrip("+-"), None
|
134 |
+
if int(integer):
|
135 |
+
splitted = [integer[max(i - 4, 0) : i] for i in range(len(integer), 0, -4)]
|
136 |
+
intresult = []
|
137 |
+
for nu, unit in enumerate(splitted):
|
138 |
+
# special cases
|
139 |
+
if int(unit) == 0: # 0000
|
140 |
+
intresult.append(c_basic[0])
|
141 |
+
continue
|
142 |
+
elif nu > 0 and int(unit) == 2: # 0002
|
143 |
+
intresult.append(c_twoalt + c_unit2[nu - 1])
|
144 |
+
continue
|
145 |
+
ulist = []
|
146 |
+
unit = unit.zfill(4)
|
147 |
+
for nc, ch in enumerate(reversed(unit)):
|
148 |
+
if ch == "0":
|
149 |
+
if ulist: # ???0
|
150 |
+
ulist.append(c_basic[0])
|
151 |
+
elif nc == 0:
|
152 |
+
ulist.append(c_basic[int(ch)])
|
153 |
+
elif nc == 1 and ch == "1" and all([i == "0" for i in unit[: nc + 1]]):
|
154 |
+
# special case for tens
|
155 |
+
# edit the 'elif' if you don't like
|
156 |
+
# 十四, 三千零十四, 三千三百一十���
|
157 |
+
ulist.append(c_unit1[0])
|
158 |
+
elif nc > 1 and ch == "2":
|
159 |
+
ulist.append(c_twoalt + c_unit1[nc - 1])
|
160 |
+
else:
|
161 |
+
ulist.append(c_basic[int(ch)] + c_unit1[nc - 1])
|
162 |
+
# print(ulist)
|
163 |
+
ustr = revuniq(ulist)
|
164 |
+
if nu == 0:
|
165 |
+
intresult.append(ustr)
|
166 |
+
else:
|
167 |
+
intresult.append(ustr + c_unit2[nu - 1])
|
168 |
+
result.append(revuniq(intresult).strip(c_basic[0]))
|
169 |
+
else:
|
170 |
+
result.append(c_basic[0])
|
171 |
+
if remainder:
|
172 |
+
result.append(c_symbol[2])
|
173 |
+
result.append("".join(c_basic[int(ch)] for ch in remainder))
|
174 |
+
return "".join(result)
|
175 |
+
|
176 |
+
|
177 |
+
if __name__ == "__main__":
|
178 |
+
text = "若手機仔幾多號?吾手機仔係0964-498042。"
|
179 |
+
|
180 |
+
print(f"{text} -> {parse_num(text)}")
|
ipa/ipa.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
import jieba
|
6 |
+
from omegaconf import OmegaConf
|
7 |
+
|
8 |
+
from ipa.convert_digits import parse_num
|
9 |
+
from ipa.proc_text import (
|
10 |
+
apply_v2f,
|
11 |
+
normalize_text,
|
12 |
+
prep_regex,
|
13 |
+
run_jieba,
|
14 |
+
update_jieba_dict,
|
15 |
+
)
|
16 |
+
|
17 |
+
ipa_configs = OmegaConf.to_object(OmegaConf.load("configs/ipa.yaml"))
|
18 |
+
for key in ipa_configs["preserved_list"]:
|
19 |
+
ipa_configs["v2f_dict"].pop(key, None)
|
20 |
+
delimiter_regex, replace_regex, v2f_regex = prep_regex(
|
21 |
+
ipa_configs["delimiter_list"], ipa_configs["replace_dict"], ipa_configs["v2f_dict"]
|
22 |
+
)
|
23 |
+
|
24 |
+
def get_ipa(raw_text, dialect):
|
25 |
+
lexicon = ipa_configs["lexicon"][dialect]
|
26 |
+
update_jieba_dict(
|
27 |
+
list(lexicon.keys()), Path(os.path.dirname(jieba.__file__)) / "dict.txt"
|
28 |
+
)
|
29 |
+
text = normalize_text(raw_text, ipa_configs["replace_dict"], replace_regex)
|
30 |
+
text = parse_num(text)
|
31 |
+
text_parts = [s.strip() for s in re.split(delimiter_regex, text) if s.strip()]
|
32 |
+
text = ",".join(text_parts)
|
33 |
+
word_list = run_jieba(text)
|
34 |
+
word_list = apply_v2f(word_list, ipa_configs["v2f_dict"], v2f_regex)
|
35 |
+
word_list = run_jieba("".join(word_list))
|
36 |
+
|
37 |
+
final_words = []
|
38 |
+
final_pinyin = []
|
39 |
+
final_ipa = []
|
40 |
+
missing_words = []
|
41 |
+
for word in word_list:
|
42 |
+
if not bool(word.strip()):
|
43 |
+
continue
|
44 |
+
if word == ",":
|
45 |
+
final_words.append(",")
|
46 |
+
final_pinyin.append(",")
|
47 |
+
final_ipa.append(",")
|
48 |
+
elif word not in lexicon:
|
49 |
+
final_words.append(word)
|
50 |
+
missing_words.append(word)
|
51 |
+
else:
|
52 |
+
final_words.append(f"{word}")
|
53 |
+
final_pinyin.append(lexicon[word]['pinyin'][0])
|
54 |
+
# NOTE 只有 lexicon[word] 中的第一個 ipa 才被考慮
|
55 |
+
final_ipa.append(lexicon[word]['ipa'][0].replace(" ", "-"))
|
56 |
+
|
57 |
+
if len(final_ipa) == 0 or len(missing_words) > 0:
|
58 |
+
return final_words, final_ipa, final_pinyin, missing_words
|
59 |
+
|
60 |
+
final_words = " ".join(final_words).replace(" , ", ",")
|
61 |
+
final_ipa = " ".join(final_ipa).replace(" , ", ",")
|
62 |
+
final_pinyin = " ".join(final_pinyin).replace(" , ", ",")
|
63 |
+
|
64 |
+
return final_words, final_ipa, final_pinyin, missing_words
|
65 |
+
|
66 |
+
def parse_ipa(ipa: str):
|
67 |
+
text = []
|
68 |
+
ipa_list = re.split(r"(?<![, -])(?=[, -])|(?<=[, -])(?![, -])",ipa)
|
69 |
+
# tone as a separate token
|
70 |
+
for phoneme_with_tone in ipa_list:
|
71 |
+
if phoneme_with_tone ==" ":
|
72 |
+
text.append(phoneme_with_tone)
|
73 |
+
continue
|
74 |
+
elif phoneme_with_tone == ",":
|
75 |
+
text.extend(" , ")
|
76 |
+
continue
|
77 |
+
elif phoneme_with_tone == "-": # use " " split 詞 (or use " " to split 字)
|
78 |
+
continue
|
79 |
+
|
80 |
+
split_phoneme_and_tone = phoneme_with_tone.split("_")
|
81 |
+
|
82 |
+
if len(split_phoneme_and_tone) == 2:
|
83 |
+
phoneme, tone = split_phoneme_and_tone
|
84 |
+
text.extend(phoneme)
|
85 |
+
text.append(tone)
|
86 |
+
else:
|
87 |
+
text.extend(split_phoneme_and_tone[0])
|
88 |
+
return text
|
ipa/proc_text.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2024 Hung-Shin Lee (hungshinlee@gmail.com)
|
2 |
+
# Apache 2.0
|
3 |
+
|
4 |
+
import re
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import Tuple
|
7 |
+
from unicodedata import normalize
|
8 |
+
|
9 |
+
import jieba
|
10 |
+
import opencc
|
11 |
+
|
12 |
+
jieba.setLogLevel(20)
|
13 |
+
jieba.re_han_default = re.compile("([\u2e80-\U000e01efa-zA-Z0-9+#&\._%\-']+)", re.U)
|
14 |
+
|
15 |
+
s2tw_converter = opencc.OpenCC("s2tw.json")
|
16 |
+
|
17 |
+
|
18 |
+
def update_jieba_dict(
|
19 |
+
lexicon: list,
|
20 |
+
jieba_dict_path: Path,
|
21 |
+
high_freq_words: list = [],
|
22 |
+
high_freq_words_weight: int = 10,
|
23 |
+
) -> list:
|
24 |
+
lexicon = sorted(set(lexicon))
|
25 |
+
|
26 |
+
jieba_dict_path.unlink(missing_ok=True)
|
27 |
+
Path("/tmp/jieba.cache").unlink(missing_ok=True)
|
28 |
+
|
29 |
+
with jieba_dict_path.open("w") as file:
|
30 |
+
for word in lexicon:
|
31 |
+
if word in high_freq_words:
|
32 |
+
file.write(f"{word} {len(word) * high_freq_words_weight}\n")
|
33 |
+
else:
|
34 |
+
file.write(f"{word} {len(word)}\n")
|
35 |
+
|
36 |
+
jieba.dt.initialized = False
|
37 |
+
|
38 |
+
return lexicon
|
39 |
+
|
40 |
+
|
41 |
+
def run_jieba(line: str) -> list:
|
42 |
+
# NOTE JIEBA 處理多行文本的結果會失去原本的行結構
|
43 |
+
|
44 |
+
seg_list = list(jieba.cut(line, cut_all=False, HMM=False))
|
45 |
+
|
46 |
+
return seg_list
|
47 |
+
|
48 |
+
|
49 |
+
def normalize_text(text: str, replace_dict: dict, replace_regex: str) -> str:
|
50 |
+
def replace_match(match):
|
51 |
+
return replace_dict[match.group(0)]
|
52 |
+
|
53 |
+
text = re.sub("\x08", "", text)
|
54 |
+
text = re.sub("\ufeff", "", text)
|
55 |
+
text = re.sub("\u0010", "", text)
|
56 |
+
text = normalize("NFKC", text)
|
57 |
+
text = re.sub(replace_regex, replace_match, text)
|
58 |
+
text = " ".join(text.split()).upper()
|
59 |
+
|
60 |
+
return text
|
61 |
+
|
62 |
+
|
63 |
+
def apply_v2f(word_list: list, v2f_dict: dict, v2f_regex: str) -> list:
|
64 |
+
result = []
|
65 |
+
for word in word_list:
|
66 |
+
result.append(re.sub(v2f_regex, lambda x: v2f_dict[x.group(0)], word))
|
67 |
+
|
68 |
+
return result
|
69 |
+
|
70 |
+
|
71 |
+
def prep_regex(
|
72 |
+
delimiter_list: list, replace_dict: dict = {}, v2f_dict: dict = {}
|
73 |
+
) -> Tuple[str, str, str]:
|
74 |
+
delimiter_regex = "|".join(map(re.escape, delimiter_list))
|
75 |
+
|
76 |
+
replace_regex = ""
|
77 |
+
if len(replace_dict):
|
78 |
+
sorted_keys = sorted(replace_dict.keys(), key=len, reverse=True)
|
79 |
+
replace_regex = "|".join(map(re.escape, sorted_keys))
|
80 |
+
|
81 |
+
v2f_regex = ""
|
82 |
+
if len(v2f_dict):
|
83 |
+
v2f_regex = "|".join(map(re.escape, v2f_dict.keys()))
|
84 |
+
|
85 |
+
return delimiter_regex, replace_regex, v2f_regex
|
replace/tts.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Dict, List
|
3 |
+
from TTS.tts.configs.vits_config import VitsConfig
|
4 |
+
|
5 |
+
from coqpit import Coqpit
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class CharactersConfig(Coqpit):
|
10 |
+
"""Defines arguments for the `BaseCharacters` or `BaseVocabulary` and their subclasses.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
characters_class (str):
|
14 |
+
Defines the class of the characters used. If None, we pick ```Phonemes``` or ```Graphemes``` based on
|
15 |
+
the configuration. Defaults to None.
|
16 |
+
|
17 |
+
vocab_dict (dict):
|
18 |
+
Defines the vocabulary dictionary used to encode the characters. Defaults to None.
|
19 |
+
|
20 |
+
pad (str):
|
21 |
+
characters in place of empty padding. Defaults to None.
|
22 |
+
|
23 |
+
eos (str):
|
24 |
+
characters showing the end of a sentence. Defaults to None.
|
25 |
+
|
26 |
+
bos (str):
|
27 |
+
characters showing the beginning of a sentence. Defaults to None.
|
28 |
+
|
29 |
+
blank (str):
|
30 |
+
Optional character used between characters by some models for better prosody. Defaults to `_blank`.
|
31 |
+
|
32 |
+
characters (str):
|
33 |
+
character set used by the model. Characters not in this list are ignored when converting input text to
|
34 |
+
a list of sequence IDs. Defaults to None.
|
35 |
+
|
36 |
+
punctuations (str):
|
37 |
+
characters considered as punctuation as parsing the input sentence. Defaults to None.
|
38 |
+
|
39 |
+
phonemes (str):
|
40 |
+
characters considered as parsing phonemes. This is only for backwards compat. Use `characters` for new
|
41 |
+
models. Defaults to None.
|
42 |
+
|
43 |
+
is_unique (bool):
|
44 |
+
remove any duplicate characters in the character lists. It is a bandaid for compatibility with the old
|
45 |
+
models trained with character lists with duplicates. Defaults to True.
|
46 |
+
|
47 |
+
is_sorted (bool):
|
48 |
+
Sort the characters in alphabetical order. Defaults to True.
|
49 |
+
"""
|
50 |
+
|
51 |
+
characters_class: str = None
|
52 |
+
|
53 |
+
# using BaseVocabulary
|
54 |
+
vocab_dict: Dict = None
|
55 |
+
|
56 |
+
# using on BaseCharacters
|
57 |
+
pad: str = None
|
58 |
+
eos: str = None
|
59 |
+
bos: str = None
|
60 |
+
blank: str = None
|
61 |
+
characters: List[str] = None
|
62 |
+
punctuations: str = None
|
63 |
+
phonemes: str = None
|
64 |
+
is_unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
|
65 |
+
is_sorted: bool = True
|
66 |
+
|
67 |
+
|
68 |
+
@dataclass
|
69 |
+
class ChangedVitsConfig(VitsConfig):
|
70 |
+
characters: CharactersConfig = None
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
jieba
|
2 |
+
opencc
|
3 |
+
TTS
|
4 |
+
omegaconf
|