from typing import Optional, Union import numpy as np import torch from .f0 import F0Predictor class FCPE(F0Predictor): def __init__( self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100, device="cpu", ): super().__init__( hop_length, f0_min, f0_max, sampling_rate, device, ) from torchfcpe import ( spawn_bundled_infer_model, ) # must be imported at here, or it will cause fairseq crash on training self.model = spawn_bundled_infer_model(self.device) def compute_f0( self, wav: np.ndarray, p_len: Optional[int] = None, filter_radius: Optional[Union[int, float]] = 0.006, ): if p_len is None: p_len = wav.shape[0] // self.hop_length if not torch.is_tensor(wav): wav = torch.from_numpy(wav) f0 = ( self.model.infer( wav.float().to(self.device).unsqueeze(0), sr=self.sampling_rate, decoder_mode="local_argmax", threshold=filter_radius, ) .squeeze() .cpu() .numpy() ) return self._interpolate_f0(self._resize_f0(f0, p_len))[0]