zhzluke96
update
f367757
import gc
import logging
import threading
import torch
from transformers import LlamaTokenizer
from modules import config
from modules.ChatTTS import ChatTTS
from modules.devices import devices
logger = logging.getLogger(__name__)
chat_tts = None
lock = threading.Lock()
def load_chat_tts_in_thread():
global chat_tts
if chat_tts:
return
logger.info("Loading ChatTTS models")
chat_tts = ChatTTS.Chat()
device = devices.get_device_for("chattts")
dtype = devices.dtype
chat_tts.load_models(
compile=config.runtime_env_vars.compile,
source="local",
local_path="./models/ChatTTS",
device=device,
dtype=dtype,
dtype_vocos=devices.dtype_vocos,
dtype_dvae=devices.dtype_dvae,
dtype_gpt=devices.dtype_gpt,
dtype_decoder=devices.dtype_decoder,
)
# 如果 device 为 cpu 同时,又是 dtype == float16 那么报 warn
# 提示可能无法正常运行,建议使用 float32 即开启 `--no_half` 参数
if device == devices.cpu and dtype == torch.float16:
logger.warning(
"The device is CPU and dtype is float16, which may not work properly. It is recommended to use float32 by enabling the `--no_half` parameter."
)
devices.torch_gc()
logger.info("ChatTTS models loaded")
def load_chat_tts():
with lock:
if chat_tts is None:
load_chat_tts_in_thread()
if chat_tts is None:
raise Exception("Failed to load ChatTTS models")
return chat_tts
def unload_chat_tts():
logging.info("Unloading ChatTTS models")
global chat_tts
if chat_tts:
for model_name, model in chat_tts.pretrain_models.items():
if isinstance(model, torch.nn.Module):
model.cpu()
del model
chat_tts = None
devices.torch_gc()
gc.collect()
logger.info("ChatTTS models unloaded")
def reload_chat_tts():
logging.info("Reloading ChatTTS models")
unload_chat_tts()
instance = load_chat_tts()
logger.info("ChatTTS models reloaded")
return instance
def get_tokenizer() -> LlamaTokenizer:
chat_tts = load_chat_tts()
tokenizer = chat_tts.pretrain_models["tokenizer"]
return tokenizer