File size: 4,897 Bytes
fd6a905
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import re
from pathlib import Path

import jieba.posseg as psg
from pypinyin import Style, lazy_pinyin

from style_bert_vits2.nlp.chinese.tone_sandhi import ToneSandhi
from style_bert_vits2.nlp.symbols import PUNCTUATIONS


with open(Path(__file__).parent / "opencpop-strict.txt", encoding="utf-8") as f:
    __PINYIN_TO_SYMBOL_MAP = {
        line.split("\t")[0]: line.strip().split("\t")[1] for line in f.readlines()
    }


def g2p(text: str) -> tuple[list[str], list[int], list[int]]:
    pattern = r"(?<=[{0}])\s*".format("".join(PUNCTUATIONS))
    sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
    phones, tones, word2ph = __g2p(sentences)
    assert sum(word2ph) == len(phones)
    assert len(word2ph) == len(text)  # Sometimes it will crash,you can add a try-catch.
    phones = ["_"] + phones + ["_"]
    tones = [0] + tones + [0]
    word2ph = [1] + word2ph + [1]
    return phones, tones, word2ph


def __g2p(segments: list[str]) -> tuple[list[str], list[int], list[int]]:
    phones_list = []
    tones_list = []
    word2ph = []
    tone_modifier = ToneSandhi()
    for seg in segments:
        # Replace all English words in the sentence
        seg = re.sub("[a-zA-Z]+", "", seg)
        seg_cut = psg.lcut(seg)
        initials = []
        finals = []
        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)  # type: ignore
        for word, pos in seg_cut:
            if pos == "eng":
                continue
            sub_initials, sub_finals = __get_initials_finals(word)
            sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
            initials.append(sub_initials)
            finals.append(sub_finals)

            # assert len(sub_initials) == len(sub_finals) == len(word)
        initials = sum(initials, [])
        finals = sum(finals, [])
        #
        for c, v in zip(initials, finals):
            raw_pinyin = c + v
            # NOTE: post process for pypinyin outputs
            # we discriminate i, ii and iii
            if c == v:
                assert c in PUNCTUATIONS
                phone = [c]
                tone = "0"
                word2ph.append(1)
            else:
                v_without_tone = v[:-1]
                tone = v[-1]

                pinyin = c + v_without_tone
                assert tone in "12345"

                if c:
                    # 多音节
                    v_rep_map = {
                        "uei": "ui",
                        "iou": "iu",
                        "uen": "un",
                    }
                    if v_without_tone in v_rep_map:
                        pinyin = c + v_rep_map[v_without_tone]
                else:
                    # 单音节
                    pinyin_rep_map = {
                        "ing": "ying",
                        "i": "yi",
                        "in": "yin",
                        "u": "wu",
                    }
                    if pinyin in pinyin_rep_map:
                        pinyin = pinyin_rep_map[pinyin]
                    else:
                        single_rep_map = {
                            "v": "yu",
                            "e": "e",
                            "i": "y",
                            "u": "w",
                        }
                        if pinyin[0] in single_rep_map:
                            pinyin = single_rep_map[pinyin[0]] + pinyin[1:]

                assert pinyin in __PINYIN_TO_SYMBOL_MAP, (
                    pinyin,
                    seg,
                    raw_pinyin,
                )
                phone = __PINYIN_TO_SYMBOL_MAP[pinyin].split(" ")
                word2ph.append(len(phone))

            phones_list += phone
            tones_list += [int(tone)] * len(phone)
    return phones_list, tones_list, word2ph


def __get_initials_finals(word: str) -> tuple[list[str], list[str]]:
    initials = []
    finals = []
    orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
    orig_finals = lazy_pinyin(
        word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
    )
    for c, v in zip(orig_initials, orig_finals):
        initials.append(c)
        finals.append(v)
    return initials, finals


if __name__ == "__main__":
    from style_bert_vits2.nlp.chinese.bert_feature import extract_bert_feature
    from style_bert_vits2.nlp.chinese.normalizer import normalize_text

    text = "啊!但是《原神》是由,米哈游自主,  [研发]的一款全.新开放世界.冒险游戏"
    text = normalize_text(text)
    print(text)
    phones, tones, word2ph = g2p(text)
    bert = extract_bert_feature(text, word2ph, "cuda")

    print(phones, tones, word2ph, bert.shape)


# 示例用法
# text = "这是一个示例文本:,你好!这是一个测试...."
# print(g2p_paddle(text))  # 输出: 这是一个示例文本你好这是一个测试