Spaces:
Sleeping
Sleeping
import csv | |
from io import BytesIO | |
import requests | |
from omegaconf import OmegaConf | |
EXTRA_G2P = { | |
"z": "z", | |
"o": "o", | |
"h": "h", | |
"g": "g", | |
"y": "j", | |
"w": "w", | |
"c": "ʦ", | |
"u": "u", | |
"f": "f", | |
"v": "v", | |
"j": "ɟ", | |
"b": "b", | |
"q": "q", | |
"e": "e", | |
",": ",", | |
} | |
def gh_download(repo, path, token): | |
headers = { | |
"Authorization": f"Bearer {token}", | |
"Accept": "application/vnd.github.raw+json", | |
} | |
url = f"https://api.github.com/repos/{repo}/contents/{path}" | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
raise Exception(f"Failed to download {path} from {repo}, response: {response}") | |
response.encoding = "utf-8-sig" | |
return response.text | |
def load_g2p(g2p_string): | |
g2p = dict() | |
csv_reader = csv.DictReader(g2p_string.split("\n")) | |
for row in csv_reader: | |
# print(row) | |
language = row["Language"] | |
dialect = row["Dialect"] | |
if dialect == "-": | |
lang_tag = f"{language}" | |
else: | |
lang_tag = f"{language}_{dialect}" | |
for key in row: | |
if key in ["Language", "Dialect"]: | |
continue | |
if row[key] == "-": | |
continue | |
g2p[lang_tag] = g2p.get(lang_tag, {}) | |
g2p[lang_tag][key] = row[key].split(",")[0] | |
for g, p in EXTRA_G2P.items(): | |
if g not in g2p[lang_tag]: | |
g2p[lang_tag][g] = p | |
return g2p | |
OmegaConf.register_new_resolver("gh_download", gh_download) | |
OmegaConf.register_new_resolver("load_g2p", load_g2p) | |
g2p = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"] | |