|
from typing import Any, Optional, Union
|
|
|
|
import numpy as np
|
|
import pyworld
|
|
|
|
from .f0 import F0Predictor
|
|
|
|
|
|
class Dio(F0Predictor):
|
|
def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
|
|
super().__init__(hop_length, f0_min, f0_max, sampling_rate)
|
|
|
|
def compute_f0(
|
|
self,
|
|
wav: np.ndarray,
|
|
p_len: Optional[int] = None,
|
|
filter_radius: Optional[Union[int, float]] = None,
|
|
):
|
|
if p_len is None:
|
|
p_len = wav.shape[0] // self.hop_length
|
|
f0, t = pyworld.dio(
|
|
wav.astype(np.double),
|
|
fs=self.sampling_rate,
|
|
f0_floor=self.f0_min,
|
|
f0_ceil=self.f0_max,
|
|
frame_period=1000 * self.hop_length / self.sampling_rate,
|
|
)
|
|
f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate)
|
|
for index, pitch in enumerate(f0):
|
|
f0[index] = round(pitch, 1)
|
|
return self._interpolate_f0(self._resize_f0(f0, p_len))[0]
|
|
|