Spaces:
Running
Running
import gc | |
import os | |
from contextlib import contextmanager | |
from time import time | |
from typing import Optional | |
import streamlit as st | |
from tortoise.api import TextToSpeech | |
from tortoise.utils.audio import load_voices | |
def timeit(desc=""): | |
start = time() | |
yield | |
print(f"{desc} took {time() - start:.2f} seconds") | |
def load_model( | |
model_dir, | |
high_vram, | |
kv_cache, | |
ar_checkpoint, | |
diff_checkpoint, | |
): | |
gc.collect() | |
return TextToSpeech( | |
models_dir=model_dir, | |
high_vram=high_vram, | |
kv_cache=kv_cache, | |
ar_checkpoint=ar_checkpoint, | |
diff_checkpoint=diff_checkpoint, | |
) | |
def list_voices(extra_voices_dir: Optional[str]): | |
voices = ["random"] | |
if extra_voices_dir and os.path.isdir(extra_voices_dir): | |
voices.extend(os.listdir(extra_voices_dir)) | |
extra_voices_ls = [extra_voices_dir] | |
else: | |
extra_voices_ls = [] | |
voices.extend( | |
[v for v in os.listdir("tortoise/voices") if v != "cond_latent_example"] | |
) | |
# | |
return voices, extra_voices_ls | |
def load_voice_conditionings(voice, extra_voices_ls): | |
voice_samples, conditioning_latents = load_voices(voice, extra_voices_ls) | |
return voice_samples, conditioning_latents | |