from typing import Any, Optional, Union | |
import numpy as np | |
import torch | |
import torchcrepe | |
from .f0 import F0Predictor | |
class CRePE(F0Predictor): | |
def __init__( | |
self, | |
hop_length=512, | |
f0_min=50, | |
f0_max=1100, | |
sampling_rate=44100, | |
device="cpu", | |
): | |
if "privateuseone" in str(device): | |
device = "cpu" | |
super().__init__( | |
hop_length, | |
f0_min, | |
f0_max, | |
sampling_rate, | |
device, | |
) | |
def compute_f0( | |
self, | |
wav: np.ndarray, | |
p_len: Optional[int] = None, | |
filter_radius: Optional[Union[int, float]] = None, | |
): | |
if p_len is None: | |
p_len = wav.shape[0] // self.hop_length | |
if not torch.is_tensor(wav): | |
wav = torch.from_numpy(wav) | |
# Pick a batch size that doesn't cause memory errors on your gpu | |
batch_size = 512 | |
# Compute pitch using device 'device' | |
f0, pd = torchcrepe.predict( | |
wav.float().to(self.device).unsqueeze(dim=0), | |
self.sampling_rate, | |
self.hop_length, | |
self.f0_min, | |
self.f0_max, | |
batch_size=batch_size, | |
device=self.device, | |
return_periodicity=True, | |
) | |
pd = torchcrepe.filter.median(pd, 3) | |
f0 = torchcrepe.filter.mean(f0, 3) | |
f0[pd < 0.1] = 0 | |
f0 = f0[0].cpu().numpy() | |
return self._interpolate_f0(self._resize_f0(f0, p_len))[0] | |