VTTS-speechT5 / src /speechbrain_speaker_embedding.py
linh-truong's picture
init
5c60553
raw
history blame
597 Bytes
import torch
import os
from speechbrain.pretrained import EncoderClassifier
spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
device = "cuda" if torch.cuda.is_available() else "cpu"
speaker_model = EncoderClassifier.from_hparams(
source=spk_model_name,
run_opts={"device": device},
savedir=os.path.join("/tmp", spk_model_name),
)
def create_speaker_embedding(waveform):
with torch.no_grad():
speaker_embeddings = speaker_model.encode_batch(waveform)
speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=-1)
return speaker_embeddings