Spaces:
Running
Running
File size: 1,374 Bytes
12e6d5e 75c23d4 fd4c500 12e6d5e fd4c500 12e6d5e fd4c500 12e6d5e fd4c500 12e6d5e fd4c500 12e6d5e fd4c500 12e6d5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
"""Functions for denoising data during preprocessing."""
from typing import Optional, Tuple, cast
import numpy as np
from numpy import ndarray
def denoise(
X: ndarray,
y: ndarray,
Xresampled: Optional[ndarray] = None,
random_state: Optional[np.random.RandomState] = None,
) -> Tuple[ndarray, ndarray]:
"""Denoise the dataset using a Gaussian process."""
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
gpr = GaussianProcessRegressor(
kernel=gp_kernel, n_restarts_optimizer=50, random_state=random_state
)
gpr.fit(X, y)
if Xresampled is not None:
return Xresampled, cast(ndarray, gpr.predict(Xresampled))
return X, cast(ndarray, gpr.predict(X))
def multi_denoise(
X: ndarray,
y: ndarray,
Xresampled: Optional[ndarray] = None,
random_state: Optional[np.random.RandomState] = None,
):
"""Perform `denoise` along each column of `y` independently."""
y = np.stack(
[
denoise(X, y[:, i], Xresampled=Xresampled, random_state=random_state)[1]
for i in range(y.shape[1])
],
axis=1,
)
if Xresampled is not None:
return Xresampled, y
return X, y
|