from typing import Any, Optional, Union import numpy as np import pyworld from .f0 import F0Predictor class Dio(F0Predictor): def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100): super().__init__(hop_length, f0_min, f0_max, sampling_rate) def compute_f0( self, wav: np.ndarray, p_len: Optional[int] = None, filter_radius: Optional[Union[int, float]] = None, ): if p_len is None: p_len = wav.shape[0] // self.hop_length f0, t = pyworld.dio( wav.astype(np.double), fs=self.sampling_rate, f0_floor=self.f0_min, f0_ceil=self.f0_max, frame_period=1000 * self.hop_length / self.sampling_rate, ) f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate) for index, pitch in enumerate(f0): f0[index] = round(pitch, 1) return self._interpolate_f0(self._resize_f0(f0, p_len))[0]