Spaces:
Running
Running
MilesCranmer
commited on
Commit
•
5750d1a
1
Parent(s):
fdb138f
Add denoising operation with test
Browse files- pysr/sr.py +37 -0
- test/test.py +17 -0
pysr/sr.py
CHANGED
@@ -130,6 +130,8 @@ def pysr(
|
|
130 |
optimizer_iterations=10,
|
131 |
tournament_selection_n=10,
|
132 |
tournament_selection_p=1.0,
|
|
|
|
|
133 |
):
|
134 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
135 |
Note: most default parameters have been tuned over several example
|
@@ -244,6 +246,8 @@ def pysr(
|
|
244 |
:type tournament_selection_n: int
|
245 |
:param tournament_selection_p: Probability of selecting the best expression in each tournament. The probability will decay as p*(1-p)^n for other expressions, sorted by loss.
|
246 |
:type tournament_selection_p: float
|
|
|
|
|
247 |
:returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
|
248 |
:type: pd.DataFrame/list
|
249 |
"""
|
@@ -327,6 +331,24 @@ def pysr(
|
|
327 |
else:
|
328 |
raise NotImplementedError("y shape not supported!")
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
kwargs = dict(
|
331 |
X=X,
|
332 |
y=y,
|
@@ -387,6 +409,7 @@ def pysr(
|
|
387 |
nout=nout,
|
388 |
tournament_selection_n=tournament_selection_n,
|
389 |
tournament_selection_p=tournament_selection_p,
|
|
|
390 |
)
|
391 |
|
392 |
kwargs = {**_set_paths(tempdir), **kwargs}
|
@@ -1082,6 +1105,20 @@ def _yesno(question):
|
|
1082 |
return False
|
1083 |
|
1084 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1085 |
class CallableEquation(object):
|
1086 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
1087 |
|
|
|
130 |
optimizer_iterations=10,
|
131 |
tournament_selection_n=10,
|
132 |
tournament_selection_p=1.0,
|
133 |
+
denoise=False,
|
134 |
+
Xresampled=None,
|
135 |
):
|
136 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
137 |
Note: most default parameters have been tuned over several example
|
|
|
246 |
:type tournament_selection_n: int
|
247 |
:param tournament_selection_p: Probability of selecting the best expression in each tournament. The probability will decay as p*(1-p)^n for other expressions, sorted by loss.
|
248 |
:type tournament_selection_p: float
|
249 |
+
:param denoise: Whether to use a Gaussian Process to denoise the data before inputting to PySR. Can help PySR fit noisy data.
|
250 |
+
:type denoise: bool
|
251 |
:returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
|
252 |
:type: pd.DataFrame/list
|
253 |
"""
|
|
|
331 |
else:
|
332 |
raise NotImplementedError("y shape not supported!")
|
333 |
|
334 |
+
if denoise:
|
335 |
+
if weights is not None:
|
336 |
+
raise NotImplementedError(
|
337 |
+
"No weights for denoising - the weights are learned."
|
338 |
+
)
|
339 |
+
if Xresampled is not None and selection is not None:
|
340 |
+
# Select among only the selected features:
|
341 |
+
Xresampled = Xresampled[:, selection]
|
342 |
+
if multioutput:
|
343 |
+
y = np.stack(
|
344 |
+
[_denoise(X, y[:, i], Xresampled=Xresampled)[1] for i in range(nout)],
|
345 |
+
axis=1,
|
346 |
+
)
|
347 |
+
if Xresampled is not None:
|
348 |
+
X = Xresampled
|
349 |
+
else:
|
350 |
+
X, y = _denoise(X, y, Xresampled=Xresampled)
|
351 |
+
|
352 |
kwargs = dict(
|
353 |
X=X,
|
354 |
y=y,
|
|
|
409 |
nout=nout,
|
410 |
tournament_selection_n=tournament_selection_n,
|
411 |
tournament_selection_p=tournament_selection_p,
|
412 |
+
denoise=denoise,
|
413 |
)
|
414 |
|
415 |
kwargs = {**_set_paths(tempdir), **kwargs}
|
|
|
1105 |
return False
|
1106 |
|
1107 |
|
1108 |
+
def _denoise(X, y, Xresampled=None):
|
1109 |
+
"""Denoise the dataset using a Gaussian process"""
|
1110 |
+
from sklearn.gaussian_process import GaussianProcessRegressor
|
1111 |
+
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
|
1112 |
+
|
1113 |
+
gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
|
1114 |
+
gpr = GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=50)
|
1115 |
+
gpr.fit(X, y)
|
1116 |
+
if Xresampled is not None:
|
1117 |
+
return Xresampled, gpr.predict(Xresampled)
|
1118 |
+
|
1119 |
+
return X, gpr.predict(X)
|
1120 |
+
|
1121 |
+
|
1122 |
class CallableEquation(object):
|
1123 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
1124 |
|
test/test.py
CHANGED
@@ -82,6 +82,23 @@ class TestPipeline(unittest.TestCase):
|
|
82 |
|
83 |
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
class TestBest(unittest.TestCase):
|
87 |
def setUp(self):
|
|
|
82 |
|
83 |
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
|
84 |
|
85 |
+
def test_noisy(self):
|
86 |
+
|
87 |
+
np.random.seed(1)
|
88 |
+
y = self.X[:, [0, 1]] ** 2 + np.random.randn(self.X.shape[0]) * 0.05
|
89 |
+
equations = pysr(
|
90 |
+
self.X,
|
91 |
+
y,
|
92 |
+
unary_operators=["sq(x) = x^2"],
|
93 |
+
binary_operators=["plus"],
|
94 |
+
extra_sympy_mappings={"sq": lambda x: x ** 2},
|
95 |
+
**self.default_test_kwargs,
|
96 |
+
procs=0,
|
97 |
+
denoise=True,
|
98 |
+
)
|
99 |
+
self.assertLessEqual(best_row(equations=equations)[0]["MSE"], 1e-4)
|
100 |
+
self.assertLessEqual(best_row(equations=equations)[1]["MSE"], 1e-4)
|
101 |
+
|
102 |
|
103 |
class TestBest(unittest.TestCase):
|
104 |
def setUp(self):
|