formosan-tts / ipa /__init__.py
txya900619's picture
feat: add app.py
c4d001b
raw
history blame
1.7 kB
import csv
from io import BytesIO
import requests
from omegaconf import OmegaConf
EXTRA_G2P = {
"z": "z",
"o": "o",
"h": "h",
"g": "g",
"y": "j",
"w": "w",
"c": "ʦ",
"u": "u",
"f": "f",
"v": "v",
"j": "ɟ",
"b": "b",
"q": "q",
"e": "e",
",": ",",
}
def gh_download(repo, path, token):
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.github.raw+json",
}
url = f"https://api.github.com/repos/{repo}/contents/{path}"
response = requests.get(url, headers=headers)
if response.status_code != 200:
raise Exception(f"Failed to download {path} from {repo}, response: {response}")
response.encoding = "utf-8-sig"
return response.text
def load_g2p(g2p_string):
g2p = dict()
csv_reader = csv.DictReader(g2p_string.split("\n"))
for row in csv_reader:
# print(row)
language = row["Language"]
dialect = row["Dialect"]
if dialect == "-":
lang_tag = f"{language}"
else:
lang_tag = f"{language}_{dialect}"
for key in row:
if key in ["Language", "Dialect"]:
continue
if row[key] == "-":
continue
g2p[lang_tag] = g2p.get(lang_tag, {})
g2p[lang_tag][key] = row[key].split(",")[0]
for g, p in EXTRA_G2P.items():
if g not in g2p[lang_tag]:
g2p[lang_tag][g] = p
return g2p
OmegaConf.register_new_resolver("gh_download", gh_download)
OmegaConf.register_new_resolver("load_g2p", load_g2p)
g2p = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"]