File size: 2,388 Bytes
8c92a11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import torch
import librosa
from utils.util import JsonHParams
from utils.f0 import get_f0_features_using_parselmouth, get_pitch_sub_median
from utils.mel import extract_mel_features
def extract_spr(
audio,
fs=None,
hop_length=256,
win_length=1024,
n_fft=1024,
n_mels=128,
f0_min=37,
f0_max=1000,
pitch_bin=256,
pitch_max=1100.0,
pitch_min=50.0,
):
"""Compute Singing Power Ratio (SPR) from a given audio.
audio: path to the audio.
fs: sampling rate.
hop_length: hop length.
win_length: window length.
n_mels: number of mel filters.
f0_min: lower limit for f0.
f0_max: upper limit for f0.
pitch_bin: number of bins for f0 quantization.
pitch_max: upper limit for f0 quantization.
pitch_min: lower limit for f0 quantization.
"""
# Load audio
if fs != None:
audio, _ = librosa.load(audio, sr=fs)
else:
audio, fs = librosa.load(audio)
audio = torch.from_numpy(audio)
# Initialize config
cfg = JsonHParams()
cfg.sample_rate = fs
cfg.hop_size = hop_length
cfg.win_size = win_length
cfg.n_fft = n_fft
cfg.n_mel = n_mels
cfg.f0_min = f0_min
cfg.f0_max = f0_max
cfg.pitch_bin = pitch_bin
cfg.pitch_max = pitch_max
cfg.pitch_min = pitch_min
# Extract mel spectrograms
cfg.fmin = 2000
cfg.fmax = 4000
mel1 = extract_mel_features(
y=audio.unsqueeze(0),
cfg=cfg,
).squeeze(0)
cfg.fmin = 0
cfg.fmax = 2000
mel2 = extract_mel_features(
y=audio.unsqueeze(0),
cfg=cfg,
).squeeze(0)
f0 = get_f0_features_using_parselmouth(
audio,
cfg,
)
# Mel length alignment
length = min(len(f0), mel1.shape[-1])
f0 = f0[:length]
mel1 = mel1[:, :length]
mel2 = mel2[:, :length]
# Compute SPR
res = []
for i in range(mel1.shape[-1]):
if f0[i] <= 1:
continue
chunk1 = mel1[:, i]
chunk2 = mel2[:, i]
max1 = max(chunk1.numpy())
max2 = max(chunk2.numpy())
tmp_res = max2 - max1
res.append(tmp_res)
if len(res) == 0:
return False
else:
return sum(res) / len(res)
|