Spaces:

united-link
/

formosan-tts

Sleeping

formosan-tts / ipa /__init__.py

feat: add app.py

c4d001b 2 months ago

1.7 kB

	import csv
	from io import BytesIO

	import requests
	from omegaconf import OmegaConf

	EXTRA_G2P = {
	"z": "z",
	"o": "o",
	"h": "h",
	"g": "g",
	"y": "j",
	"w": "w",
	"c": "ʦ",
	"u": "u",
	"f": "f",
	"v": "v",
	"j": "ɟ",
	"b": "b",
	"q": "q",
	"e": "e",
	",": ",",
	}


	def gh_download(repo, path, token):
	headers = {
	"Authorization": f"Bearer {token}",
	"Accept": "application/vnd.github.raw+json",
	}

	url = f"https://api.github.com/repos/{repo}/contents/{path}"
	response = requests.get(url, headers=headers)
	if response.status_code != 200:
	raise Exception(f"Failed to download {path} from {repo}, response: {response}")
	response.encoding = "utf-8-sig"

	return response.text


	def load_g2p(g2p_string):
	g2p = dict()

	csv_reader = csv.DictReader(g2p_string.split("\n"))

	for row in csv_reader:
	# print(row)
	language = row["Language"]
	dialect = row["Dialect"]

	if dialect == "-":
	lang_tag = f"{language}"
	else:
	lang_tag = f"{language}_{dialect}"

	for key in row:
	if key in ["Language", "Dialect"]:
	continue

	if row[key] == "-":
	continue

	g2p[lang_tag] = g2p.get(lang_tag, {})
	g2p[lang_tag][key] = row[key].split(",")[0]

	for g, p in EXTRA_G2P.items():
	if g not in g2p[lang_tag]:
	g2p[lang_tag][g] = p

	return g2p


	OmegaConf.register_new_resolver("gh_download", gh_download)
	OmegaConf.register_new_resolver("load_g2p", load_g2p)

	g2p = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"]