Spaces:
Running
Running
MilesCranmer
commited on
Commit
·
8b29fef
1
Parent(s):
21ae49d
Hide other internal functions
Browse files- pysr/sr.py +5 -5
pysr/sr.py
CHANGED
@@ -192,7 +192,7 @@ def pysr(X=None, y=None, weights=None,
|
|
192 |
(as strings).
|
193 |
|
194 |
"""
|
195 |
-
|
196 |
|
197 |
if isinstance(X, pd.DataFrame):
|
198 |
variable_names = list(X.columns)
|
@@ -210,7 +210,7 @@ def pysr(X=None, y=None, weights=None,
|
|
210 |
if len(X) > 10000 and not batching:
|
211 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
212 |
|
213 |
-
X, variable_names =
|
214 |
X, select_k_features,
|
215 |
use_custom_variable_names, variable_names, y
|
216 |
)
|
@@ -516,7 +516,7 @@ def _using_test_input(X, test, y):
|
|
516 |
return X, y
|
517 |
|
518 |
|
519 |
-
def
|
520 |
if select_k_features is not None:
|
521 |
selection = run_feature_selection(X, y, select_k_features)
|
522 |
print(f"Using features {selection}")
|
@@ -562,7 +562,7 @@ def _set_paths(tempdir):
|
|
562 |
weights_filename=weights_filename, y_filename=y_filename)
|
563 |
|
564 |
|
565 |
-
def
|
566 |
# Check for potential errors before they happen
|
567 |
assert len(unary_operators) + len(binary_operators) > 0
|
568 |
assert len(X.shape) == 2
|
@@ -575,7 +575,7 @@ def check_assertions(X, binary_operators, unary_operators, use_custom_variable_n
|
|
575 |
assert len(variable_names) == X.shape[1]
|
576 |
|
577 |
|
578 |
-
def
|
579 |
if threads is not None:
|
580 |
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
581 |
if limitPowComplexity:
|
|
|
192 |
(as strings).
|
193 |
|
194 |
"""
|
195 |
+
_raise_depreciation_errors(limitPowComplexity, threads)
|
196 |
|
197 |
if isinstance(X, pd.DataFrame):
|
198 |
variable_names = list(X.columns)
|
|
|
210 |
if len(X) > 10000 and not batching:
|
211 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
212 |
|
213 |
+
X, variable_names = _handle_feature_selection(
|
214 |
X, select_k_features,
|
215 |
use_custom_variable_names, variable_names, y
|
216 |
)
|
|
|
516 |
return X, y
|
517 |
|
518 |
|
519 |
+
def _handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y):
|
520 |
if select_k_features is not None:
|
521 |
selection = run_feature_selection(X, y, select_k_features)
|
522 |
print(f"Using features {selection}")
|
|
|
562 |
weights_filename=weights_filename, y_filename=y_filename)
|
563 |
|
564 |
|
565 |
+
def _check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y):
|
566 |
# Check for potential errors before they happen
|
567 |
assert len(unary_operators) + len(binary_operators) > 0
|
568 |
assert len(X.shape) == 2
|
|
|
575 |
assert len(variable_names) == X.shape[1]
|
576 |
|
577 |
|
578 |
+
def _raise_depreciation_errors(limitPowComplexity, threads):
|
579 |
if threads is not None:
|
580 |
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
581 |
if limitPowComplexity:
|