Spaces:
Build error
Build error
Commit
·
1fe6b04
1
Parent(s):
e06adea
Upload ProsodicConditionExtractor.py
Browse files
ProsodicConditionExtractor.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import soundfile as sf
|
2 |
+
import torch
|
3 |
+
import torch.multiprocessing
|
4 |
+
import torch.multiprocessing
|
5 |
+
from numpy import trim_zeros
|
6 |
+
from speechbrain.pretrained import EncoderClassifier
|
7 |
+
|
8 |
+
from Preprocessing.AudioPreprocessor import AudioPreprocessor
|
9 |
+
|
10 |
+
|
11 |
+
class ProsodicConditionExtractor:
|
12 |
+
|
13 |
+
def __init__(self, sr, device=torch.device("cpu")):
|
14 |
+
self.ap = AudioPreprocessor(input_sr=sr, output_sr=16000, melspec_buckets=80, hop_length=256, n_fft=1024, cut_silence=False)
|
15 |
+
# https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
|
16 |
+
self.speaker_embedding_func_ecapa = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb",
|
17 |
+
run_opts={"device": str(device)},
|
18 |
+
savedir="Models/SpeakerEmbedding/speechbrain_speaker_embedding_ecapa")
|
19 |
+
# https://huggingface.co/speechbrain/spkrec-xvect-voxceleb
|
20 |
+
self.speaker_embedding_func_xvector = EncoderClassifier.from_hparams(source="speechbrain/spkrec-xvect-voxceleb",
|
21 |
+
run_opts={"device": str(device)},
|
22 |
+
savedir="Models/SpeakerEmbedding/speechbrain_speaker_embedding_xvector")
|
23 |
+
|
24 |
+
def extract_condition_from_reference_wave(self, wave, already_normalized=False):
|
25 |
+
if already_normalized:
|
26 |
+
norm_wave = wave
|
27 |
+
else:
|
28 |
+
norm_wave = self.ap.audio_to_wave_tensor(normalize=True, audio=wave)
|
29 |
+
norm_wave = torch.tensor(trim_zeros(norm_wave.numpy()))
|
30 |
+
spk_emb_ecapa = self.speaker_embedding_func_ecapa.encode_batch(wavs=norm_wave.unsqueeze(0)).squeeze()
|
31 |
+
spk_emb_xvector = self.speaker_embedding_func_xvector.encode_batch(wavs=norm_wave.unsqueeze(0)).squeeze()
|
32 |
+
combined_utt_condition = torch.cat([spk_emb_ecapa.cpu(),
|
33 |
+
spk_emb_xvector.cpu()], dim=0)
|
34 |
+
return combined_utt_condition
|
35 |
+
|
36 |
+
|
37 |
+
if __name__ == '__main__':
|
38 |
+
wave, sr = sf.read("../audios/1.wav")
|
39 |
+
ext = ProsodicConditionExtractor(sr=sr)
|
40 |
+
print(ext.extract_condition_from_reference_wave(wave=wave).shape)
|