Spaces:

hon9kon9ize
/

tts

Running

File size: 7,444 Bytes

from text.symbols import punctuation
import re
import unicodedata
import cn2an
import pycantonese
import jieba
import csv


jieba.load_userdict("./text/yue_dict.txt")

jyutping_dict = {}

with open("./text/jyutping.csv", "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        word, jyutping = line.split(",")

        if word not in jyutping_dict:
            jyutping_dict[word] = [jyutping]
        else:
            jyutping_dict[word].append(jyutping)


def normalizer(x):
    x = cn2an.transform(x, "an2cn")

    return x


def word2jyutping(word):
    jyutpings = [pycantonese.characters_to_jyutping(
        w)[0][1] for w in word if unicodedata.name(w, "").startswith("CJK UNIFIED IDEOGRAPH")]

    for i, j in enumerate(jyutpings):
        if re.search(r"^(la|ga)[1-6]$", j):
            # la1 -> laa1, ga1 -> gaa1
            jyutpings[i] = jyutpings[i].replace('a', 'aa')

    if None in jyutpings:
        raise ValueError(f"Failed to convert {word} to jyutping: {jyutpings}")

    return " ".join(jyutpings)


INITIALS = ["", "b", "c", "d", "f", "g", "gw", "h", "j",
            "k", "kw", "l", "m", "n", "ng", "p", "s", "t", "w", "z"]
FINALS = ["aa", "aai", "aau", "aam", "aan", "aang", "aap", "aat", "aak", "ai", "au", "am", "an", "ang", "ap", "at", "ak", "e", "ei", "eu", "em", "eng", "ep", "ek", "i", "iu", "im",
          "in", "ing", "ip", "it", "ik", "o", "oi", "ou", "on", "ong", "ot", "ok", "oe", "oeng", "oek", "eoi", "eon", "eot", "u", "ui", "un", "ung", "ut", "uk", "yu", "yun", "yut", "m", "ng"]

rep_map = {
    "：": ",",
    "︰": ",",
    "；": ",",
    "，": ",",
    "﹐": ",",
    "。": ".",
    "！": "!",
    "？": "?",
    "﹖": "?",
    "﹗": "!",
    "\n": ".",
    "·": ",",
    "、": ",",
    "丶": ",",
    "...": "…",
    "⋯": "…",
    "$": ".",
    "“": "'",
    "”": "'",
    '"': "'",
    "‘": "'",
    "’": "'",
    "（": "'",
    "）": "'",
    "(": "'",
    ")": "'",
    "《": "'",
    "》": "'",
    "【": "'",
    "】": "'",
    "[": "'",
    "]": "'",
    "—": "-",
    "～": "-",
    "~": "-",
    "「": "'",
    "」": "'",
    "_": "-",
}

replacement_chars = {
    "\n": " ",
    'ㄧ': '一',
    '—': '一',
    '更': '更',
    '不': '不',
    '料': '料',
    '聯': '聯',
    '行': '行',
    '利': '利',
    '謢': '護',
    '岀': '出',
    '鎭': '鎮',
    '戯': '戲',
    '旣': '既',
    '立': '立',
    '來': '來',
    '年': '年',
    '㗇': '蝦',
}


def replace_punctuation(text):
    pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
    replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
    replaced_text = "".join(
        c for c in replaced_text if unicodedata.name(c, "").startswith("CJK UNIFIED IDEOGRAPH") or c in punctuation
    )

    return replaced_text


def replace_chars(text):
    for k, v in replacement_chars.items():
        text = text.replace(k, v)
    return text


def word_segmentation(text):
    words = jieba.cut(text)
    return words


def text_normalize(text):
    text = text.strip()
    text = normalizer(text)
    text = replace_punctuation(text)
    text = replace_chars(text)
    return text


def jyuping_to_initials_finals_tones(jyuping_syllables):
    initials_finals = []
    tones = []
    word2ph = []

    for syllable in jyuping_syllables:
        if syllable in punctuation:
            initials_finals.append(syllable)
            tones.append(0)
            word2ph.append(1)  # Add 1 for punctuation
        else:
            init, final, tone = parse_jyutping(syllable)
            initials_finals.extend([init, final])
            tones.extend([tone, tone])
            word2ph.append(2)

    assert len(initials_finals) == len(tones)
    return initials_finals, tones, word2ph


wordshk_juytping = {}

# with open("/notebooks/bert-vits2/Bert-VITS2-Cantonese/wordshk_juytping.csv", "r") as csv_file:
#     csv_reader = csv.reader(csv_file, delimiter=',')

#     for row in csv_reader:
#         wordshk_juytping[text_normalize(row[0])] = row[1]


def get_jyutping(text):
    if text in wordshk_juytping:
        return wordshk_juytping[text].split(" ")

    words = word_segmentation(text)
    jyutping_array = []
    punct_pattern = re.compile(
        r"^[{}]+$".format(re.escape("".join(punctuation))))

    for word in words:
        if punct_pattern.match(word):
            puncts = re.split(r"([{}])".format(
                re.escape("".join(punctuation))), word)
            for punct in puncts:
                if len(punct) > 0:
                    jyutping_array.append(punct)
        else:
            jyutpings = ""

            if word in jyutping_dict:
                jyutpings = jyutping_dict[word][0]
            else:
                jyutpings = word2jyutping(word)

            # match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
            if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
                raise ValueError(
                    f"Failed to convert {word} to jyutping: {jyutpings}")

            jyutping_array.extend(jyutpings.split(" "))

    return jyutping_array


def get_bert_feature(text, word2ph):
    from text import cantonese_bert

    return cantonese_bert.get_bert_feature(text, word2ph)


def parse_jyutping(jyutping):
    orig_jyutping = jyutping

    if len(jyutping) < 2:
        raise ValueError(f"Jyutping string too short: {jyutping}")
    init = ""
    if jyutping[0] == 'n' and jyutping[1] == 'g' and len(jyutping) == 3:
        init = ""
    elif jyutping[0] == 'm' and len(jyutping) == 2:
        init = ""
    elif jyutping[0] == 'n' and jyutping[1] == 'g':
        init = 'ng'
        jyutping = jyutping[2:]
    elif jyutping[0] == 'g' and jyutping[1] == 'w':
        init = 'gw'
        jyutping = jyutping[2:]
    elif jyutping[0] == 'k' and jyutping[1] == 'w':
        init = 'kw'
        jyutping = jyutping[2:]
    elif jyutping[0] in 'bpmfdtnlgkhwzcsj':
        init = jyutping[0]
        jyutping = jyutping[1:]
    else:
        jyutping = jyutping
    try:
        tone = int(jyutping[-1])
        jyutping = jyutping[:-1]
    except:
        raise ValueError(
            f"Jyutping string does not end with a tone number, in {orig_jyutping}")
    final = jyutping

    assert init in INITIALS, f"Invalid initial: {init}, in {orig_jyutping}"

    if final not in FINALS:
        raise ValueError(f"Invalid final: {final}, in {orig_jyutping}")

    return [init, final, tone]


def g2p(text):
    word2ph = []
    jyuping = get_jyutping(text)
    phones, tones, word2ph = jyuping_to_initials_finals_tones(jyuping)
    phones = ["_"] + phones + ["_"]
    tones = [0] + tones + [0]
    word2ph = [1] + word2ph + [1]
    return phones, tones, word2ph


if __name__ == "__main__":
    from text.cantonese_bert import get_bert_feature

    # text = "Apple BB 你點解會咁柒㗎？我真係唔該晒你呀！123"
    text = "佢邊係想辭工吖，跳下草裙舞想加人工之嘛。"
    # text = "我個 app 嘅介紹文想由你寫，因為我唔知從一般用家角度要細緻到乜程度"
    # text = "佢哋最叻咪就係去㗇人傷害人,得個殼咋!"
    text = text_normalize(text)
    print('normalized text', text)
    phones, tones, word2ph = g2p(text)
    print(phones, tones, word2ph)
    bert = get_bert_feature(text, word2ph)
    print(bert.shape)