Spaces:
Running
Running
MilesCranmer
commited on
Merge pull request #564 from MilesCranmer/create-pull-request/patch
Browse files- .deepsource.toml +0 -16
- README.md +1 -1
- docs/tuning.md +1 -1
- pysr/_cli/main.py +17 -4
- pysr/julia_extensions.py +32 -0
- pysr/juliapkg.json +1 -9
- pysr/param_groupings.yml +1 -0
- pysr/sr.py +33 -11
- pysr/test/test.py +18 -13
- pysr/test/test_cli.py +2 -1
- pysr/test/test_nb.ipynb +1 -28
.deepsource.toml
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
version = 1
|
2 |
-
|
3 |
-
test_patterns = ["test/*.py"]
|
4 |
-
|
5 |
-
exclude_patterns = ["Project.toml"]
|
6 |
-
|
7 |
-
[[analyzers]]
|
8 |
-
name = "python"
|
9 |
-
enabled = true
|
10 |
-
|
11 |
-
[analyzers.meta]
|
12 |
-
runtime_version = "3.x.x"
|
13 |
-
|
14 |
-
[[transformers]]
|
15 |
-
name = "black"
|
16 |
-
enabled = true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -287,7 +287,7 @@ model = PySRRegressor(
|
|
287 |
# ^ Higher precision calculations.
|
288 |
warm_start=True,
|
289 |
# ^ Start from where left off.
|
290 |
-
|
291 |
# ^ Faster evaluation (experimental)
|
292 |
julia_project=None,
|
293 |
# ^ Can set to the path of a folder containing the
|
|
|
287 |
# ^ Higher precision calculations.
|
288 |
warm_start=True,
|
289 |
# ^ Start from where left off.
|
290 |
+
bumper=True,
|
291 |
# ^ Faster evaluation (experimental)
|
292 |
julia_project=None,
|
293 |
# ^ Can set to the path of a folder containing the
|
docs/tuning.md
CHANGED
@@ -20,7 +20,7 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
|
|
20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
22 |
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
23 |
-
11. Set `
|
24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
25 |
|
26 |
Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
|
|
|
20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
22 |
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
23 |
+
11. Set `bumper` to `True`. This turns on bump allocation but is experimental. It should give you a nice 20% speedup.
|
24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
25 |
|
26 |
Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
|
pysr/_cli/main.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import sys
|
2 |
import unittest
|
3 |
import warnings
|
@@ -52,7 +53,14 @@ TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"}
|
|
52 |
|
53 |
@pysr.command("test")
|
54 |
@click.argument("tests", nargs=1)
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
"""Run parts of the PySR test suite.
|
57 |
|
58 |
Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
|
@@ -78,11 +86,16 @@ def _tests(tests):
|
|
78 |
loader = unittest.TestLoader()
|
79 |
suite = unittest.TestSuite()
|
80 |
for test_case in test_cases:
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
runner = unittest.TextTestRunner()
|
83 |
results = runner.run(suite)
|
84 |
-
# Normally unittest would run this, but here we have
|
85 |
-
# to do it manually to get the exit code.
|
86 |
|
87 |
if not results.wasSuccessful():
|
88 |
sys.exit(1)
|
|
|
1 |
+
import fnmatch
|
2 |
import sys
|
3 |
import unittest
|
4 |
import warnings
|
|
|
53 |
|
54 |
@pysr.command("test")
|
55 |
@click.argument("tests", nargs=1)
|
56 |
+
@click.option(
|
57 |
+
"-k",
|
58 |
+
"expressions",
|
59 |
+
multiple=True,
|
60 |
+
type=str,
|
61 |
+
help="Filter expressions to select specific tests.",
|
62 |
+
)
|
63 |
+
def _tests(tests, expressions):
|
64 |
"""Run parts of the PySR test suite.
|
65 |
|
66 |
Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
|
|
|
86 |
loader = unittest.TestLoader()
|
87 |
suite = unittest.TestSuite()
|
88 |
for test_case in test_cases:
|
89 |
+
loaded_tests = loader.loadTestsFromTestCase(test_case)
|
90 |
+
for test in loaded_tests:
|
91 |
+
if len(expressions) == 0 or any(
|
92 |
+
fnmatch.fnmatch(test.id(), "*" + expression + "*")
|
93 |
+
for expression in expressions
|
94 |
+
):
|
95 |
+
suite.addTest(test)
|
96 |
+
|
97 |
runner = unittest.TextTestRunner()
|
98 |
results = runner.run(suite)
|
|
|
|
|
99 |
|
100 |
if not results.wasSuccessful():
|
101 |
sys.exit(1)
|
pysr/julia_extensions.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""This file installs and loads extensions for SymbolicRegression."""
|
2 |
+
|
3 |
+
from .julia_import import jl
|
4 |
+
|
5 |
+
|
6 |
+
def load_required_packages(
|
7 |
+
*, turbo=False, bumper=False, enable_autodiff=False, cluster_manager=None
|
8 |
+
):
|
9 |
+
if turbo:
|
10 |
+
load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
|
11 |
+
if bumper:
|
12 |
+
load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e")
|
13 |
+
if enable_autodiff:
|
14 |
+
load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
|
15 |
+
if cluster_manager is not None:
|
16 |
+
load_package("ClusterManagers", "34f1f09b-3a8b-5176-ab39-66d58a4d544e")
|
17 |
+
|
18 |
+
|
19 |
+
def load_package(package_name, uuid):
|
20 |
+
jl.seval(
|
21 |
+
f"""
|
22 |
+
try
|
23 |
+
using {package_name}
|
24 |
+
catch e
|
25 |
+
isa(e, ArgumentError) || throw(e)
|
26 |
+
using Pkg: Pkg
|
27 |
+
Pkg.add(name="{package_name}", uuid="{uuid}")
|
28 |
+
using {package_name}
|
29 |
+
end
|
30 |
+
"""
|
31 |
+
)
|
32 |
+
return None
|
pysr/juliapkg.json
CHANGED
@@ -3,19 +3,11 @@
|
|
3 |
"packages": {
|
4 |
"SymbolicRegression": {
|
5 |
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
6 |
-
"version": "=0.
|
7 |
-
},
|
8 |
-
"ClusterManagers": {
|
9 |
-
"uuid": "34f1f09b-3a8b-5176-ab39-66d58a4d544e",
|
10 |
-
"version": "0.4"
|
11 |
},
|
12 |
"Serialization": {
|
13 |
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
14 |
"version": "1"
|
15 |
-
},
|
16 |
-
"Zygote": {
|
17 |
-
"uuid": "e88e6eb3-aa80-5325-afca-941959d7151f",
|
18 |
-
"version": "0.6"
|
19 |
}
|
20 |
}
|
21 |
}
|
|
|
3 |
"packages": {
|
4 |
"SymbolicRegression": {
|
5 |
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
6 |
+
"version": "=0.24.0"
|
|
|
|
|
|
|
|
|
7 |
},
|
8 |
"Serialization": {
|
9 |
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
10 |
"version": "1"
|
|
|
|
|
|
|
|
|
11 |
}
|
12 |
}
|
13 |
}
|
pysr/param_groupings.yml
CHANGED
@@ -74,6 +74,7 @@
|
|
74 |
- precision
|
75 |
- fast_cycle
|
76 |
- turbo
|
|
|
77 |
- enable_autodiff
|
78 |
- Determinism:
|
79 |
- random_state
|
|
|
74 |
- precision
|
75 |
- fast_cycle
|
76 |
- turbo
|
77 |
+
- bumper
|
78 |
- enable_autodiff
|
79 |
- Determinism:
|
80 |
- random_state
|
pysr/sr.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
"""Define the PySRRegressor scikit-learn interface."""
|
|
|
2 |
import copy
|
3 |
import os
|
4 |
import pickle as pkl
|
@@ -32,6 +33,7 @@ from .export_numpy import sympy2numpy
|
|
32 |
from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
|
33 |
from .export_torch import sympy2torch
|
34 |
from .feature_selection import run_feature_selection
|
|
|
35 |
from .julia_helpers import (
|
36 |
PythonCall,
|
37 |
_escape_filename,
|
@@ -482,6 +484,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
482 |
search evaluation. Certain operators may not be supported.
|
483 |
Does not support 16-bit precision floats.
|
484 |
Default is `False`.
|
|
|
|
|
|
|
|
|
485 |
precision : int
|
486 |
What precision to use for the data. By default this is `32`
|
487 |
(float32), but you can select `64` or `16` as well, giving
|
@@ -697,7 +703,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
697 |
weight_do_nothing: float = 0.21,
|
698 |
weight_mutate_constant: float = 0.048,
|
699 |
weight_mutate_operator: float = 0.47,
|
700 |
-
weight_swap_operands: float = 0.
|
701 |
weight_randomize: float = 0.00023,
|
702 |
weight_simplify: float = 0.0020,
|
703 |
weight_optimize: float = 0.0,
|
@@ -725,6 +731,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
725 |
batch_size: int = 50,
|
726 |
fast_cycle: bool = False,
|
727 |
turbo: bool = False,
|
|
|
728 |
precision: int = 32,
|
729 |
enable_autodiff: bool = False,
|
730 |
random_state=None,
|
@@ -820,6 +827,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
820 |
self.batch_size = batch_size
|
821 |
self.fast_cycle = fast_cycle
|
822 |
self.turbo = turbo
|
|
|
823 |
self.precision = precision
|
824 |
self.enable_autodiff = enable_autodiff
|
825 |
self.random_state = random_state
|
@@ -1263,9 +1271,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1263 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
1264 |
)
|
1265 |
|
|
|
1266 |
# 'Mutable' parameter validation
|
1267 |
-
|
1268 |
-
# Params and their default values, if None is given:
|
1269 |
default_param_mapping = {
|
1270 |
"binary_operators": "+ * - /".split(" "),
|
1271 |
"unary_operators": [],
|
@@ -1274,7 +1282,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1274 |
"multithreading": self.procs != 0 and self.cluster_manager is None,
|
1275 |
"batch_size": 1,
|
1276 |
"update_verbosity": int(self.verbosity),
|
1277 |
-
"progress":
|
1278 |
}
|
1279 |
packed_modified_params = {}
|
1280 |
for parameter, default_value in default_param_mapping.items():
|
@@ -1293,7 +1301,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1293 |
"`batch_size` has been increased to equal one."
|
1294 |
)
|
1295 |
parameter_value = 1
|
1296 |
-
elif
|
|
|
|
|
|
|
|
|
1297 |
warnings.warn(
|
1298 |
"Note: it looks like you are running in Jupyter. "
|
1299 |
"The progress bar will be turned off."
|
@@ -1605,6 +1617,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1605 |
else "nothing"
|
1606 |
)
|
1607 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1608 |
mutation_weights = SymbolicRegression.MutationWeights(
|
1609 |
mutate_constant=self.weight_mutate_constant,
|
1610 |
mutate_operator=self.weight_mutate_operator,
|
@@ -1646,15 +1665,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1646 |
maxdepth=maxdepth,
|
1647 |
fast_cycle=self.fast_cycle,
|
1648 |
turbo=self.turbo,
|
|
|
1649 |
enable_autodiff=self.enable_autodiff,
|
1650 |
migration=self.migration,
|
1651 |
hof_migration=self.hof_migration,
|
1652 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
1653 |
should_simplify=self.should_simplify,
|
1654 |
should_optimize_constants=self.should_optimize_constants,
|
1655 |
-
warmup_maxsize_by=
|
1656 |
-
|
1657 |
-
|
1658 |
use_frequency=self.use_frequency,
|
1659 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
1660 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
@@ -1736,9 +1756,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1736 |
),
|
1737 |
y_variable_names=jl_y_variable_names,
|
1738 |
X_units=jl_array(self.X_units_),
|
1739 |
-
y_units=
|
1740 |
-
|
1741 |
-
|
|
|
|
|
1742 |
options=options,
|
1743 |
numprocs=cprocs,
|
1744 |
parallelism=parallelism,
|
|
|
1 |
"""Define the PySRRegressor scikit-learn interface."""
|
2 |
+
|
3 |
import copy
|
4 |
import os
|
5 |
import pickle as pkl
|
|
|
33 |
from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
|
34 |
from .export_torch import sympy2torch
|
35 |
from .feature_selection import run_feature_selection
|
36 |
+
from .julia_extensions import load_required_packages
|
37 |
from .julia_helpers import (
|
38 |
PythonCall,
|
39 |
_escape_filename,
|
|
|
484 |
search evaluation. Certain operators may not be supported.
|
485 |
Does not support 16-bit precision floats.
|
486 |
Default is `False`.
|
487 |
+
bumper: bool
|
488 |
+
(Experimental) Whether to use Bumper.jl to speed up the search
|
489 |
+
evaluation. Does not support 16-bit precision floats.
|
490 |
+
Default is `False`.
|
491 |
precision : int
|
492 |
What precision to use for the data. By default this is `32`
|
493 |
(float32), but you can select `64` or `16` as well, giving
|
|
|
703 |
weight_do_nothing: float = 0.21,
|
704 |
weight_mutate_constant: float = 0.048,
|
705 |
weight_mutate_operator: float = 0.47,
|
706 |
+
weight_swap_operands: float = 0.1,
|
707 |
weight_randomize: float = 0.00023,
|
708 |
weight_simplify: float = 0.0020,
|
709 |
weight_optimize: float = 0.0,
|
|
|
731 |
batch_size: int = 50,
|
732 |
fast_cycle: bool = False,
|
733 |
turbo: bool = False,
|
734 |
+
bumper: bool = False,
|
735 |
precision: int = 32,
|
736 |
enable_autodiff: bool = False,
|
737 |
random_state=None,
|
|
|
827 |
self.batch_size = batch_size
|
828 |
self.fast_cycle = fast_cycle
|
829 |
self.turbo = turbo
|
830 |
+
self.bumper = bumper
|
831 |
self.precision = precision
|
832 |
self.enable_autodiff = enable_autodiff
|
833 |
self.random_state = random_state
|
|
|
1271 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
1272 |
)
|
1273 |
|
1274 |
+
progress = self.progress
|
1275 |
# 'Mutable' parameter validation
|
1276 |
+
# (Params and their default values, if None is given:)
|
|
|
1277 |
default_param_mapping = {
|
1278 |
"binary_operators": "+ * - /".split(" "),
|
1279 |
"unary_operators": [],
|
|
|
1282 |
"multithreading": self.procs != 0 and self.cluster_manager is None,
|
1283 |
"batch_size": 1,
|
1284 |
"update_verbosity": int(self.verbosity),
|
1285 |
+
"progress": progress,
|
1286 |
}
|
1287 |
packed_modified_params = {}
|
1288 |
for parameter, default_value in default_param_mapping.items():
|
|
|
1301 |
"`batch_size` has been increased to equal one."
|
1302 |
)
|
1303 |
parameter_value = 1
|
1304 |
+
elif (
|
1305 |
+
parameter == "progress"
|
1306 |
+
and parameter_value
|
1307 |
+
and "buffer" not in sys.stdout.__dir__()
|
1308 |
+
):
|
1309 |
warnings.warn(
|
1310 |
"Note: it looks like you are running in Jupyter. "
|
1311 |
"The progress bar will be turned off."
|
|
|
1617 |
else "nothing"
|
1618 |
)
|
1619 |
|
1620 |
+
load_required_packages(
|
1621 |
+
turbo=self.turbo,
|
1622 |
+
bumper=self.bumper,
|
1623 |
+
enable_autodiff=self.enable_autodiff,
|
1624 |
+
cluster_manager=cluster_manager,
|
1625 |
+
)
|
1626 |
+
|
1627 |
mutation_weights = SymbolicRegression.MutationWeights(
|
1628 |
mutate_constant=self.weight_mutate_constant,
|
1629 |
mutate_operator=self.weight_mutate_operator,
|
|
|
1665 |
maxdepth=maxdepth,
|
1666 |
fast_cycle=self.fast_cycle,
|
1667 |
turbo=self.turbo,
|
1668 |
+
bumper=self.bumper,
|
1669 |
enable_autodiff=self.enable_autodiff,
|
1670 |
migration=self.migration,
|
1671 |
hof_migration=self.hof_migration,
|
1672 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
1673 |
should_simplify=self.should_simplify,
|
1674 |
should_optimize_constants=self.should_optimize_constants,
|
1675 |
+
warmup_maxsize_by=(
|
1676 |
+
0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by
|
1677 |
+
),
|
1678 |
use_frequency=self.use_frequency,
|
1679 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
1680 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
|
1756 |
),
|
1757 |
y_variable_names=jl_y_variable_names,
|
1758 |
X_units=jl_array(self.X_units_),
|
1759 |
+
y_units=(
|
1760 |
+
jl_array(self.y_units_)
|
1761 |
+
if isinstance(self.y_units_, list)
|
1762 |
+
else self.y_units_
|
1763 |
+
),
|
1764 |
options=options,
|
1765 |
numprocs=cprocs,
|
1766 |
parallelism=parallelism,
|
pysr/test/test.py
CHANGED
@@ -58,16 +58,20 @@ class TestPipeline(unittest.TestCase):
|
|
58 |
model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
|
59 |
self.assertIn("c1", model.equations_.iloc[-1]["equation"])
|
60 |
|
61 |
-
def
|
62 |
y = self.X[:, 0]
|
63 |
weights = np.ones_like(y)
|
64 |
model = PySRRegressor(
|
65 |
**self.default_test_kwargs,
|
66 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
|
|
|
67 |
)
|
68 |
model.fit(self.X, y, weights=weights)
|
69 |
print(model.equations_)
|
70 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
|
|
|
|
|
|
71 |
|
72 |
def test_multiprocessing_turbo_custom_objective(self):
|
73 |
rstate = np.random.RandomState(0)
|
@@ -97,7 +101,9 @@ class TestPipeline(unittest.TestCase):
|
|
97 |
self.assertGreaterEqual(best_loss, 0.0)
|
98 |
|
99 |
# Test options stored:
|
100 |
-
self.assertEqual(
|
|
|
|
|
101 |
|
102 |
def test_multiline_seval(self):
|
103 |
# The user should be able to run multiple things in a single seval call:
|
@@ -128,7 +134,9 @@ class TestPipeline(unittest.TestCase):
|
|
128 |
self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)
|
129 |
|
130 |
# Test options stored:
|
131 |
-
self.assertEqual(
|
|
|
|
|
132 |
|
133 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
134 |
y = self.X[:, [0, 1]] ** 2
|
@@ -163,10 +171,6 @@ class TestPipeline(unittest.TestCase):
|
|
163 |
self.assertLessEqual(mse1, 1e-4)
|
164 |
self.assertLessEqual(mse2, 1e-4)
|
165 |
|
166 |
-
bad_y = model.predict(self.X, index=[0, 0])
|
167 |
-
bad_mse = np.average((bad_y - y) ** 2)
|
168 |
-
self.assertGreater(bad_mse, 1e-4)
|
169 |
-
|
170 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
171 |
X = self.X.copy()
|
172 |
y = X[:, [0, 1]] ** 2
|
@@ -1028,9 +1032,8 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
1028 |
for i in range(2):
|
1029 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
1030 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
1031 |
-
|
1032 |
-
|
1033 |
-
)
|
1034 |
|
1035 |
def test_unit_checks(self):
|
1036 |
"""This just checks the number of units passed"""
|
@@ -1107,8 +1110,10 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
1107 |
self.assertNotIn("x1", best["equation"])
|
1108 |
self.assertIn("x2", best["equation"])
|
1109 |
self.assertEqual(best["complexity"], 3)
|
1110 |
-
self.
|
1111 |
-
|
|
|
|
|
1112 |
|
1113 |
# With pkl file:
|
1114 |
pkl_file = str(temp_dir / "equation_file.pkl")
|
@@ -1127,8 +1132,8 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
1127 |
|
1128 |
# Try warm start, but with no units provided (should
|
1129 |
# be a different dataset, and thus different result):
|
1130 |
-
model.fit(X, y)
|
1131 |
model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1"
|
|
|
1132 |
self.assertEqual(model.equations_.iloc[0].complexity, 1)
|
1133 |
self.assertLess(model.equations_.iloc[0].loss, 1e-6)
|
1134 |
|
|
|
58 |
model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
|
59 |
self.assertIn("c1", model.equations_.iloc[-1]["equation"])
|
60 |
|
61 |
+
def test_linear_relation_weighted_bumper(self):
|
62 |
y = self.X[:, 0]
|
63 |
weights = np.ones_like(y)
|
64 |
model = PySRRegressor(
|
65 |
**self.default_test_kwargs,
|
66 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
|
67 |
+
bumper=True,
|
68 |
)
|
69 |
model.fit(self.X, y, weights=weights)
|
70 |
print(model.equations_)
|
71 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
72 |
+
self.assertEqual(
|
73 |
+
jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.bumper), True
|
74 |
+
)
|
75 |
|
76 |
def test_multiprocessing_turbo_custom_objective(self):
|
77 |
rstate = np.random.RandomState(0)
|
|
|
101 |
self.assertGreaterEqual(best_loss, 0.0)
|
102 |
|
103 |
# Test options stored:
|
104 |
+
self.assertEqual(
|
105 |
+
jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.turbo), True
|
106 |
+
)
|
107 |
|
108 |
def test_multiline_seval(self):
|
109 |
# The user should be able to run multiple things in a single seval call:
|
|
|
134 |
self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)
|
135 |
|
136 |
# Test options stored:
|
137 |
+
self.assertEqual(
|
138 |
+
jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.turbo), False
|
139 |
+
)
|
140 |
|
141 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
142 |
y = self.X[:, [0, 1]] ** 2
|
|
|
171 |
self.assertLessEqual(mse1, 1e-4)
|
172 |
self.assertLessEqual(mse2, 1e-4)
|
173 |
|
|
|
|
|
|
|
|
|
174 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
175 |
X = self.X.copy()
|
176 |
y = X[:, [0, 1]] ** 2
|
|
|
1032 |
for i in range(2):
|
1033 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
1034 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
1035 |
+
simple_eqs = model.equations_[i].query("complexity <= 2")
|
1036 |
+
self.assertTrue(len(simple_eqs) == 0 or simple_eqs.loss.min() > 1e-6)
|
|
|
1037 |
|
1038 |
def test_unit_checks(self):
|
1039 |
"""This just checks the number of units passed"""
|
|
|
1110 |
self.assertNotIn("x1", best["equation"])
|
1111 |
self.assertIn("x2", best["equation"])
|
1112 |
self.assertEqual(best["complexity"], 3)
|
1113 |
+
self.assertTrue(
|
1114 |
+
model.equations_.iloc[0].complexity > 1
|
1115 |
+
or model.equations_.iloc[0].loss > 1e-6
|
1116 |
+
)
|
1117 |
|
1118 |
# With pkl file:
|
1119 |
pkl_file = str(temp_dir / "equation_file.pkl")
|
|
|
1132 |
|
1133 |
# Try warm start, but with no units provided (should
|
1134 |
# be a different dataset, and thus different result):
|
|
|
1135 |
model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1"
|
1136 |
+
model.fit(X, y)
|
1137 |
self.assertEqual(model.equations_.iloc[0].complexity, 1)
|
1138 |
self.assertLess(model.equations_.iloc[0].loss, 1e-6)
|
1139 |
|
pysr/test/test_cli.py
CHANGED
@@ -61,7 +61,8 @@ def get_runtests():
|
|
61 |
tests, separated by commas.
|
62 |
|
63 |
Options:
|
64 |
-
|
|
|
65 |
"""
|
66 |
)
|
67 |
result = self.cli_runner.invoke(pysr, ["test", "--help"])
|
|
|
61 |
tests, separated by commas.
|
62 |
|
63 |
Options:
|
64 |
+
-k TEXT Filter expressions to select specific tests.
|
65 |
+
--help Show this message and exit.
|
66 |
"""
|
67 |
)
|
68 |
result = self.cli_runner.invoke(pysr, ["test", "--help"])
|
pysr/test/test_nb.ipynb
CHANGED
@@ -11,18 +11,6 @@
|
|
11 |
"text": [
|
12 |
"Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n"
|
13 |
]
|
14 |
-
},
|
15 |
-
{
|
16 |
-
"name": "stderr",
|
17 |
-
"output_type": "stream",
|
18 |
-
"text": [
|
19 |
-
"Precompiling SymbolicRegression\n",
|
20 |
-
"\u001b[32m β \u001b[39mSymbolicRegression\n",
|
21 |
-
" 1 dependency successfully precompiled in 26 seconds. 106 already precompiled.\n",
|
22 |
-
"Precompiling SymbolicRegressionJSON3Ext\n",
|
23 |
-
"\u001b[32m β \u001b[39m\u001b[90mSymbolicRegression β SymbolicRegressionJSON3Ext\u001b[39m\n",
|
24 |
-
" 1 dependency successfully precompiled in 2 seconds. 110 already precompiled.\n"
|
25 |
-
]
|
26 |
}
|
27 |
],
|
28 |
"source": [
|
@@ -143,14 +131,6 @@
|
|
143 |
"execution_count": 7,
|
144 |
"metadata": {},
|
145 |
"outputs": [
|
146 |
-
{
|
147 |
-
"name": "stderr",
|
148 |
-
"output_type": "stream",
|
149 |
-
"text": [
|
150 |
-
"/Users/mcranmer/PermaDocuments/SymbolicRegressionMonorepo/.venv/lib/python3.12/site-packages/pysr/sr.py:1297: UserWarning: Note: it looks like you are running in Jupyter. The progress bar will be turned off.\n",
|
151 |
-
" warnings.warn(\n"
|
152 |
-
]
|
153 |
-
},
|
154 |
{
|
155 |
"data": {
|
156 |
"text/plain": [
|
@@ -166,13 +146,6 @@
|
|
166 |
"model.fit(X, y)\n",
|
167 |
"type(model.equations_)"
|
168 |
]
|
169 |
-
},
|
170 |
-
{
|
171 |
-
"cell_type": "code",
|
172 |
-
"execution_count": null,
|
173 |
-
"metadata": {},
|
174 |
-
"outputs": [],
|
175 |
-
"source": []
|
176 |
}
|
177 |
],
|
178 |
"metadata": {
|
@@ -191,7 +164,7 @@
|
|
191 |
"name": "python",
|
192 |
"nbconvert_exporter": "python",
|
193 |
"pygments_lexer": "ipython3",
|
194 |
-
"version": "3.
|
195 |
}
|
196 |
},
|
197 |
"nbformat": 4,
|
|
|
11 |
"text": [
|
12 |
"Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n"
|
13 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
}
|
15 |
],
|
16 |
"source": [
|
|
|
131 |
"execution_count": 7,
|
132 |
"metadata": {},
|
133 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
{
|
135 |
"data": {
|
136 |
"text/plain": [
|
|
|
146 |
"model.fit(X, y)\n",
|
147 |
"type(model.equations_)"
|
148 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
}
|
150 |
],
|
151 |
"metadata": {
|
|
|
164 |
"name": "python",
|
165 |
"nbconvert_exporter": "python",
|
166 |
"pygments_lexer": "ipython3",
|
167 |
+
"version": "3.11.2"
|
168 |
}
|
169 |
},
|
170 |
"nbformat": 4,
|