File size: 1,290 Bytes
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os

import torch
from huggingface_hub import snapshot_download
from omegaconf import OmegaConf
from TTS.utils.synthesizer import Synthesizer


def load_model(model_id):
    model_dir = snapshot_download(model_id)
    config_file_path = os.path.join(model_dir, "config.json")
    model_ckpt_path = os.path.join(model_dir, "model.pth")
    speaker_file_path = os.path.join(model_dir, "speakers.pth")
    language_file_path = os.path.join(model_dir, "language_ids.json")
    speaker_embedding_file_path = os.path.join(model_dir, "speaker_embs.pth")

    temp_config_path = "temp_config.json"
    with open(config_file_path, "r") as f:
        content = f.read()
        content = content.replace("speakers.pth", speaker_file_path)
        content = content.replace("language_ids.json", language_file_path)
        content = content.replace("speaker_embs.pth", speaker_embedding_file_path)
        f.close()
    with open(temp_config_path, "w") as f:
        f.write(content)
        f.close()

    return Synthesizer(
        tts_checkpoint=model_ckpt_path,
        tts_config_path=temp_config_path,
        use_cuda=torch.cuda.is_available(),
    )


OmegaConf.register_new_resolver("load_model", load_model)

models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))