|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from functools import lru_cache |
|
|
|
import sherpa_onnx |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
def get_file( |
|
repo_id: str, |
|
filename: str, |
|
subfolder: str = ".", |
|
) -> str: |
|
model_filename = hf_hub_download( |
|
repo_id=repo_id, |
|
filename=filename, |
|
subfolder=subfolder, |
|
) |
|
return model_filename |
|
|
|
|
|
@lru_cache(maxsize=10) |
|
def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
|
assert repo_id == "csukuangfj/vits-vctk" |
|
|
|
model = get_file( |
|
repo_id=repo_id, |
|
filename="vits-vctk.onnx", |
|
subfolder=".", |
|
) |
|
|
|
lexicon = get_file( |
|
repo_id=repo_id, |
|
filename="lexicon.txt", |
|
subfolder=".", |
|
) |
|
|
|
tokens = get_file( |
|
repo_id=repo_id, |
|
filename="tokens.txt", |
|
subfolder=".", |
|
) |
|
|
|
tts_config = sherpa_onnx.OfflineTtsConfig( |
|
model=sherpa_onnx.OfflineTtsModelConfig( |
|
vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
|
model=model, |
|
lexicon=lexicon, |
|
tokens=tokens, |
|
length_scale=1.0 / speed, |
|
), |
|
provider="cpu", |
|
debug=True, |
|
num_threads=2, |
|
) |
|
) |
|
tts = sherpa_onnx.OfflineTts(tts_config) |
|
|
|
return tts |
|
|
|
|
|
@lru_cache(maxsize=10) |
|
def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
|
assert repo_id == "csukuangfj/vits-ljs" |
|
|
|
model = get_file( |
|
repo_id=repo_id, |
|
filename="vits-ljs.onnx", |
|
subfolder=".", |
|
) |
|
|
|
lexicon = get_file( |
|
repo_id=repo_id, |
|
filename="lexicon.txt", |
|
subfolder=".", |
|
) |
|
|
|
tokens = get_file( |
|
repo_id=repo_id, |
|
filename="tokens.txt", |
|
subfolder=".", |
|
) |
|
|
|
tts_config = sherpa_onnx.OfflineTtsConfig( |
|
model=sherpa_onnx.OfflineTtsModelConfig( |
|
vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
|
model=model, |
|
lexicon=lexicon, |
|
tokens=tokens, |
|
length_scale=1.0 / speed, |
|
), |
|
provider="cpu", |
|
debug=True, |
|
num_threads=2, |
|
) |
|
) |
|
tts = sherpa_onnx.OfflineTts(tts_config) |
|
|
|
return tts |
|
|
|
|
|
@lru_cache(maxsize=10) |
|
def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
|
n = len("vits-piper-") |
|
name = repo_id.split("/")[1][n:] |
|
|
|
model = get_file( |
|
repo_id=repo_id, |
|
filename=f"{name}.onnx", |
|
subfolder=".", |
|
) |
|
|
|
tokens = get_file( |
|
repo_id=repo_id, |
|
filename="tokens.txt", |
|
subfolder=".", |
|
) |
|
|
|
tts_config = sherpa_onnx.OfflineTtsConfig( |
|
model=sherpa_onnx.OfflineTtsModelConfig( |
|
vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
|
model=model, |
|
lexicon="", |
|
data_dir="/tmp/espeak-ng-data", |
|
tokens=tokens, |
|
length_scale=1.0 / speed, |
|
), |
|
provider="cpu", |
|
debug=True, |
|
num_threads=2, |
|
) |
|
) |
|
tts = sherpa_onnx.OfflineTts(tts_config) |
|
|
|
return tts |
|
|
|
|
|
@lru_cache(maxsize=10) |
|
def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
|
assert repo_id == "csukuangfj/vits-zh-aishell3" |
|
|
|
model = get_file( |
|
repo_id=repo_id, |
|
filename="vits-aishell3.onnx", |
|
subfolder=".", |
|
) |
|
|
|
lexicon = get_file( |
|
repo_id=repo_id, |
|
filename="lexicon.txt", |
|
subfolder=".", |
|
) |
|
|
|
tokens = get_file( |
|
repo_id=repo_id, |
|
filename="tokens.txt", |
|
subfolder=".", |
|
) |
|
|
|
rule_fst = get_file( |
|
repo_id=repo_id, |
|
filename="rule.fst", |
|
subfolder=".", |
|
) |
|
|
|
tts_config = sherpa_onnx.OfflineTtsConfig( |
|
model=sherpa_onnx.OfflineTtsModelConfig( |
|
vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
|
model=model, |
|
lexicon=lexicon, |
|
tokens=tokens, |
|
length_scale=1.0 / speed, |
|
), |
|
provider="cpu", |
|
debug=True, |
|
num_threads=2, |
|
), |
|
rule_fsts=rule_fst, |
|
) |
|
tts = sherpa_onnx.OfflineTts(tts_config) |
|
|
|
return tts |
|
|
|
|
|
@lru_cache(maxsize=10) |
|
def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
|
if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id: |
|
model = repo_id.split("/")[-1] |
|
elif "coqui" in repo_id: |
|
model = "model" |
|
else: |
|
model = repo_id.split("-")[-1] |
|
|
|
model = get_file( |
|
repo_id=repo_id, |
|
filename=f"{model}.onnx", |
|
subfolder=".", |
|
) |
|
|
|
lexicon = get_file( |
|
repo_id=repo_id, |
|
filename="lexicon.txt", |
|
subfolder=".", |
|
) |
|
|
|
tokens = get_file( |
|
repo_id=repo_id, |
|
filename="tokens.txt", |
|
subfolder=".", |
|
) |
|
|
|
if "coqui" not in repo_id: |
|
rule_fst = get_file( |
|
repo_id=repo_id, |
|
filename="rule.fst", |
|
subfolder=".", |
|
) |
|
else: |
|
rule_fst = "" |
|
|
|
tts_config = sherpa_onnx.OfflineTtsConfig( |
|
model=sherpa_onnx.OfflineTtsModelConfig( |
|
vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
|
model=model, |
|
lexicon=lexicon, |
|
tokens=tokens, |
|
length_scale=1.0 / speed, |
|
), |
|
provider="cpu", |
|
debug=True, |
|
num_threads=2, |
|
), |
|
rule_fsts=rule_fst, |
|
) |
|
tts = sherpa_onnx.OfflineTts(tts_config) |
|
|
|
return tts |
|
|
|
|
|
@lru_cache(maxsize=10) |
|
def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
|
if repo_id in chinese_models: |
|
return chinese_models[repo_id](repo_id, speed) |
|
if repo_id in cantonese_models: |
|
return cantonese_models[repo_id](repo_id, speed) |
|
elif repo_id in english_models: |
|
return english_models[repo_id](repo_id, speed) |
|
elif repo_id in german_models: |
|
return german_models[repo_id](repo_id, speed) |
|
elif repo_id in spanish_models: |
|
return spanish_models[repo_id](repo_id, speed) |
|
elif repo_id in french_models: |
|
return french_models[repo_id](repo_id, speed) |
|
else: |
|
raise ValueError(f"Unsupported repo_id: {repo_id}") |
|
|
|
|
|
cantonese_models = { |
|
"csukuangfj/vits-cantonese-hf-xiaomaiiwn": _get_vits_hf, |
|
} |
|
|
|
chinese_models = { |
|
"csukuangfj/vits-zh-hf-theresa": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-eula": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-echo": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-bronya": _get_vits_hf, |
|
"csukuangfj/vits-zh-aishell3": _get_vits_zh_aishell3, |
|
"csukuangfj/vits-zh-hf-fanchen-wnj": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-fanchen-C": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe_new": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-fanchen-unity": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-doom": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-zenyatta": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-abyssinvoker": _get_vits_hf, |
|
"csukuangfj/vits-zh-hf-keqing": _get_vits_hf, |
|
|
|
|
|
} |
|
|
|
english_models = { |
|
"csukuangfj/vits-vctk": _get_vits_vctk, |
|
"csukuangfj/vits-ljs": _get_vits_ljs, |
|
|
|
"csukuangfj/vits-coqui-en-vctk": _get_vits_hf, |
|
"csukuangfj/vits-coqui-en-ljspeech": _get_vits_hf, |
|
"csukuangfj/vits-coqui-en-ljspeech-neon": _get_vits_hf, |
|
|
|
"csukuangfj/vits-piper-en_US-amy-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-amy-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-arctic-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-danny-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-hfc_male-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-joe-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-kathleen-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-kusal-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-l2arctic-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-lessac-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-lessac-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-lessac-high": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-libritts-high": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-libritts_r-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-ryan-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-ryan-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_US-ryan-high": _get_vits_piper, |
|
|
|
"csukuangfj/vits-piper-en_GB-alan-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-alba-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-jenny_dioco-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-northern_english_male-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-semaine-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-southern_english_female-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-en_GB-vctk-medium": _get_vits_piper, |
|
} |
|
|
|
german_models = { |
|
"csukuangfj/vits-piper-de_DE-eva_k-x_low": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-karlsson-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-kerstin-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-pavoque-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-ramona-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-thorsten-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-thorsten-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-thorsten-high": _get_vits_piper, |
|
"csukuangfj/vits-piper-de_DE-thorsten_emotional-medium": _get_vits_piper, |
|
} |
|
|
|
spanish_models = { |
|
"csukuangfj/vits-piper-es_ES-carlfm-x_low": _get_vits_piper, |
|
"csukuangfj/vits-piper-es_ES-davefx-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-es_ES-mls_10246-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-es_ES-mls_9972-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-es_ES-sharvard-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-es_MX-ald-medium": _get_vits_piper, |
|
} |
|
|
|
french_models = { |
|
|
|
|
|
"csukuangfj/vits-piper-fr_FR-upmc-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-fr_FR-siwis-low": _get_vits_piper, |
|
"csukuangfj/vits-piper-fr_FR-siwis-medium": _get_vits_piper, |
|
"csukuangfj/vits-piper-fr_FR-tjiho-model1": _get_vits_piper, |
|
"csukuangfj/vits-piper-fr_FR-tjiho-model2": _get_vits_piper, |
|
"csukuangfj/vits-piper-fr_FR-tjiho-model3": _get_vits_piper, |
|
} |
|
|
|
language_to_models = { |
|
"English": list(english_models.keys()), |
|
"Chinese (Mandarin, 普通话)": list(chinese_models.keys()), |
|
"Cantonese (粤语)": list(cantonese_models.keys()), |
|
"German": list(german_models.keys()), |
|
"Spanish": list(spanish_models.keys()), |
|
"French": list(french_models.keys()), |
|
} |
|
|