Spaces:
Running
Running
MilesCranmer
commited on
Commit
•
ad84a1c
1
Parent(s):
69aa240
Add more helpful warnings
Browse files- pysr/sr.py +15 -1
- test/test.py +28 -2
pysr/sr.py
CHANGED
@@ -691,7 +691,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
691 |
|
692 |
if maxsize > 40:
|
693 |
warnings.warn(
|
694 |
-
"Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory.
|
695 |
)
|
696 |
elif maxsize < 7:
|
697 |
raise NotImplementedError("PySR requires a maxsize of at least 7")
|
@@ -1147,6 +1147,20 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1147 |
"Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed."
|
1148 |
)
|
1149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1150 |
X, selection = _handle_feature_selection(
|
1151 |
X, select_k_features, y, variable_names
|
1152 |
)
|
|
|
691 |
|
692 |
if maxsize > 40:
|
693 |
warnings.warn(
|
694 |
+
"Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory."
|
695 |
)
|
696 |
elif maxsize < 7:
|
697 |
raise NotImplementedError("PySR requires a maxsize of at least 7")
|
|
|
1147 |
"Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed."
|
1148 |
)
|
1149 |
|
1150 |
+
if self.n_features >= 10 and not select_k_features:
|
1151 |
+
warnings.warn(
|
1152 |
+
"Note: you are running with 10 features or more. "
|
1153 |
+
"Genetic algorithms like used in PySR scale poorly with large numbers of features. "
|
1154 |
+
"Consider using feature selection techniques to select the most important features "
|
1155 |
+
"(you can do this automatically with the `select_k_features` parameter), "
|
1156 |
+
"or, alternatively, doing a dimensionality reduction beforehand. "
|
1157 |
+
"For example, `X = PCA(n_components=6).fit_transform(X)`, "
|
1158 |
+
"using scikit-learn's `PCA` class, "
|
1159 |
+
"will reduce the number of features to 6 in an interpretable way, "
|
1160 |
+
"as each resultant feature "
|
1161 |
+
"will be a linear combination of the original features. "
|
1162 |
+
)
|
1163 |
+
|
1164 |
X, selection = _handle_feature_selection(
|
1165 |
X, select_k_features, y, variable_names
|
1166 |
)
|
test/test.py
CHANGED
@@ -7,6 +7,7 @@ from pysr.sr import run_feature_selection, _handle_feature_selection
|
|
7 |
import sympy
|
8 |
from sympy import lambdify
|
9 |
import pandas as pd
|
|
|
10 |
|
11 |
|
12 |
class TestPipeline(unittest.TestCase):
|
@@ -275,11 +276,36 @@ class TestMiscellaneous(unittest.TestCase):
|
|
275 |
"""Test miscellaneous functions."""
|
276 |
|
277 |
def test_deprecation(self):
|
278 |
-
|
279 |
-
|
|
|
|
|
280 |
with self.assertWarns(UserWarning):
|
281 |
model = PySRRegressor(fractionReplaced=0.2)
|
282 |
# This is a deprecated parameter, so we should get a warning.
|
283 |
|
284 |
# The correct value should be set:
|
285 |
self.assertEqual(model.params["fraction_replaced"], 0.2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
import sympy
|
8 |
from sympy import lambdify
|
9 |
import pandas as pd
|
10 |
+
import warnings
|
11 |
|
12 |
|
13 |
class TestPipeline(unittest.TestCase):
|
|
|
276 |
"""Test miscellaneous functions."""
|
277 |
|
278 |
def test_deprecation(self):
|
279 |
+
"""Ensure that deprecation works as expected.
|
280 |
+
|
281 |
+
This should give a warning, and sets the correct value.
|
282 |
+
"""
|
283 |
with self.assertWarns(UserWarning):
|
284 |
model = PySRRegressor(fractionReplaced=0.2)
|
285 |
# This is a deprecated parameter, so we should get a warning.
|
286 |
|
287 |
# The correct value should be set:
|
288 |
self.assertEqual(model.params["fraction_replaced"], 0.2)
|
289 |
+
|
290 |
+
def test_size_warning(self):
|
291 |
+
"""Ensure that a warning is given for a large input size."""
|
292 |
+
model = PySRRegressor()
|
293 |
+
X = np.random.randn(10001, 2)
|
294 |
+
y = np.random.randn(10001)
|
295 |
+
with warnings.catch_warnings():
|
296 |
+
warnings.simplefilter("error")
|
297 |
+
with self.assertRaises(Exception) as context:
|
298 |
+
model.fit(X, y)
|
299 |
+
self.assertIn("more than 10,000", str(context.exception))
|
300 |
+
|
301 |
+
def test_feature_warning(self):
|
302 |
+
"""Ensure that a warning is given for large number of features."""
|
303 |
+
model = PySRRegressor()
|
304 |
+
X = np.random.randn(100, 10)
|
305 |
+
y = np.random.randn(100)
|
306 |
+
with warnings.catch_warnings():
|
307 |
+
warnings.simplefilter("error")
|
308 |
+
with self.assertRaises(Exception) as context:
|
309 |
+
model.fit(X, y)
|
310 |
+
self.assertIn("with 10 features or more", str(context.exception))
|
311 |
+
|