|
from typing import Any, Optional
|
|
|
|
import numpy as np
|
|
import parselmouth
|
|
|
|
from .f0 import F0Predictor
|
|
|
|
|
|
class PM(F0Predictor):
|
|
def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
|
|
super().__init__(hop_length, f0_min, f0_max, sampling_rate)
|
|
|
|
def compute_f0(
|
|
self,
|
|
wav: np.ndarray,
|
|
p_len: Optional[int] = None,
|
|
filter_radius: Optional[int] = None,
|
|
):
|
|
x = wav
|
|
if p_len is None:
|
|
p_len = x.shape[0] // self.hop_length
|
|
else:
|
|
assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
|
|
time_step = self.hop_length / self.sampling_rate * 1000
|
|
f0 = (
|
|
parselmouth.Sound(x, self.sampling_rate)
|
|
.to_pitch_ac(
|
|
time_step=time_step / 1000,
|
|
voicing_threshold=0.6,
|
|
pitch_floor=self.f0_min,
|
|
pitch_ceiling=self.f0_max,
|
|
)
|
|
.selected_array["frequency"]
|
|
)
|
|
|
|
pad_size = (p_len - len(f0) + 1) // 2
|
|
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
|
|
f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
|
|
return self._interpolate_f0(f0)[0]
|
|
|