Spaces:
Running
Running
MilesCranmer
commited on
Merge pull request #535 from MilesCranmer/pythoncall-try-3
Browse files(BREAKING) Rewrite Julia interface with PyJulia -> JuliaCall; other changes
This view is limited to 50 files because it contains too many changes.
See raw diff
- .coveragerc +4 -0
- .github/workflows/CI.yml +40 -23
- .github/workflows/CI_Windows.yml +5 -8
- .github/workflows/CI_conda_forge.yml +3 -1
- .github/workflows/CI_docker.yml +1 -1
- .github/workflows/CI_docker_large_nightly.yml +1 -1
- .github/workflows/CI_large_nightly.yml +7 -9
- .github/workflows/CI_mac.yml +6 -9
- .pre-commit-config.yaml +2 -0
- CONTRIBUTORS.md +1 -1
- Dockerfile +6 -5
- README.md +12 -38
- TODO.md +0 -142
- datasets/FeynmanEquations.csv +0 -101
- docs/backend.md +61 -15
- docs/examples.md +10 -12
- docs/options.md +9 -9
- docs/tuning.md +2 -2
- environment.yml +8 -9
- example.py +1 -1
- examples/pysr_demo.ipynb +60 -109
- pyproject.toml +29 -0
- pysr/.gitignore +1 -0
- pysr/__init__.py +9 -12
- pysr/__main__.py +1 -1
- pysr/_cli/main.py +57 -7
- pysr/deprecated.py +46 -35
- pysr/feynman_problems.py +0 -176
- pysr/julia_helpers.py +27 -326
- pysr/julia_import.py +76 -0
- pysr/juliapkg.json +21 -0
- pysr/param_groupings.yml +3 -5
- pysr/sklearn_monkeypatch.py +1 -1
- pysr/sr.py +120 -116
- pysr/test/__init__.py +11 -3
- pysr/test/__main__.py +1 -31
- pysr/test/generate_dev_juliapkg.py +17 -0
- pysr/test/incremental_install_simulator.dockerfile +0 -52
- pysr/test/nb_sanitize.cfg +3 -0
- pysr/test/params.py +8 -0
- pysr/test/test.py +102 -40
- pysr/test/test_cli.py +79 -55
- pysr/test/test_dev.py +59 -0
- pysr/test/test_dev_pysr.dockerfile +57 -0
- pysr/test/test_env.py +0 -58
- pysr/test/test_jax.py +6 -2
- pysr/test/test_nb.ipynb +536 -0
- pysr/test/test_startup.py +164 -0
- pysr/test/test_torch.py +21 -35
- pysr/version.py +0 -2
.coveragerc
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[run]
|
2 |
+
omit =
|
3 |
+
*/test/*
|
4 |
+
source = pysr
|
.github/workflows/CI.yml
CHANGED
@@ -24,6 +24,8 @@ jobs:
|
|
24 |
test:
|
25 |
runs-on: ${{ matrix.os }}
|
26 |
timeout-minutes: 60
|
|
|
|
|
27 |
defaults:
|
28 |
run:
|
29 |
shell: bash
|
@@ -38,6 +40,10 @@ jobs:
|
|
38 |
python-version: '3.7'
|
39 |
os: ubuntu-latest
|
40 |
test-id: include
|
|
|
|
|
|
|
|
|
41 |
|
42 |
steps:
|
43 |
- uses: actions/checkout@v4
|
@@ -58,29 +64,29 @@ jobs:
|
|
58 |
- name: "Install PySR"
|
59 |
run: |
|
60 |
python -m pip install --upgrade pip
|
61 |
-
pip install
|
62 |
-
python
|
63 |
-
|
64 |
-
|
65 |
-
run:
|
|
|
|
|
|
|
|
|
66 |
- name: "Run tests"
|
67 |
-
run:
|
68 |
-
coverage run --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test main
|
69 |
-
coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test cli
|
70 |
- name: "Install JAX"
|
71 |
run: pip install jax jaxlib # (optional import)
|
72 |
if: ${{ matrix.test-id == 'main' }}
|
73 |
- name: "Run JAX tests"
|
74 |
-
run: coverage run --append
|
75 |
if: ${{ matrix.test-id == 'main' }}
|
76 |
- name: "Install Torch"
|
77 |
run: pip install torch # (optional import)
|
78 |
if: ${{ matrix.test-id == 'main' }}
|
79 |
- name: "Run Torch tests"
|
80 |
-
run: coverage run --append
|
81 |
if: ${{ matrix.test-id == 'main' }}
|
82 |
-
- name: "Run custom env tests"
|
83 |
-
run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test env
|
84 |
- name: "Coveralls"
|
85 |
env:
|
86 |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
@@ -88,14 +94,26 @@ jobs:
|
|
88 |
COVERALLS_PARALLEL: true
|
89 |
run: coveralls --service=github
|
90 |
|
91 |
-
|
92 |
-
runs-on:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
steps:
|
94 |
- uses: actions/checkout@v4
|
95 |
-
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
99 |
|
100 |
conda_test:
|
101 |
runs-on: ${{ matrix.os }}
|
@@ -133,9 +151,9 @@ jobs:
|
|
133 |
- name: "Install PySR"
|
134 |
run: |
|
135 |
python3 -m pip install .
|
136 |
-
python3 -
|
137 |
- name: "Run tests"
|
138 |
-
run: cd /tmp && python -m pysr
|
139 |
|
140 |
coveralls:
|
141 |
name: Indicate completion to coveralls.io
|
@@ -177,9 +195,8 @@ jobs:
|
|
177 |
- name: "Install PySR and all dependencies"
|
178 |
run: |
|
179 |
python -m pip install --upgrade pip
|
180 |
-
|
181 |
-
|
182 |
-
python -m pip install .
|
183 |
- name: "Install additional dependencies"
|
184 |
run: python -m pip install jax jaxlib torch
|
185 |
if: ${{ matrix.python-version != '3.7' }}
|
|
|
24 |
test:
|
25 |
runs-on: ${{ matrix.os }}
|
26 |
timeout-minutes: 60
|
27 |
+
env:
|
28 |
+
COVERAGE_PROCESS_START: "${{ github.workspace }}/.coveragerc"
|
29 |
defaults:
|
30 |
run:
|
31 |
shell: bash
|
|
|
40 |
python-version: '3.7'
|
41 |
os: ubuntu-latest
|
42 |
test-id: include
|
43 |
+
- julia-version: '1'
|
44 |
+
python-version: '3.12'
|
45 |
+
os: ubuntu-latest
|
46 |
+
test-id: include
|
47 |
|
48 |
steps:
|
49 |
- uses: actions/checkout@v4
|
|
|
64 |
- name: "Install PySR"
|
65 |
run: |
|
66 |
python -m pip install --upgrade pip
|
67 |
+
pip install .
|
68 |
+
python -c 'import pysr'
|
69 |
+
- name: "Assert Julia version"
|
70 |
+
if: ${{ matrix.julia-version != '1'}}
|
71 |
+
run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor"
|
72 |
+
- name: "Install test dependencies"
|
73 |
+
run: pip install coverage coveralls pytest nbval
|
74 |
+
- name: "Set up coverage for subprocesses"
|
75 |
+
run: echo 'import coverage; coverage.process_startup()' > "${{ github.workspace }}/sitecustomize.py"
|
76 |
- name: "Run tests"
|
77 |
+
run: coverage run -m pysr test main,cli,startup
|
|
|
|
|
78 |
- name: "Install JAX"
|
79 |
run: pip install jax jaxlib # (optional import)
|
80 |
if: ${{ matrix.test-id == 'main' }}
|
81 |
- name: "Run JAX tests"
|
82 |
+
run: coverage run --append -m pysr test jax
|
83 |
if: ${{ matrix.test-id == 'main' }}
|
84 |
- name: "Install Torch"
|
85 |
run: pip install torch # (optional import)
|
86 |
if: ${{ matrix.test-id == 'main' }}
|
87 |
- name: "Run Torch tests"
|
88 |
+
run: coverage run --append -m pysr test torch
|
89 |
if: ${{ matrix.test-id == 'main' }}
|
|
|
|
|
90 |
- name: "Coveralls"
|
91 |
env:
|
92 |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
|
94 |
COVERALLS_PARALLEL: true
|
95 |
run: coveralls --service=github
|
96 |
|
97 |
+
dev_install:
|
98 |
+
runs-on: ${{ matrix.os }}
|
99 |
+
strategy:
|
100 |
+
matrix:
|
101 |
+
os: ['ubuntu-latest']
|
102 |
+
python-version: ['3.11']
|
103 |
+
julia-version: ['1']
|
104 |
+
include:
|
105 |
+
- os: ubuntu-latest
|
106 |
+
python-version: '3.7'
|
107 |
+
julia-version: '1.6'
|
108 |
steps:
|
109 |
- uses: actions/checkout@v4
|
110 |
+
- uses: actions/setup-python@v5
|
111 |
+
- name: "Install PySR"
|
112 |
+
run: |
|
113 |
+
python -m pip install --upgrade pip
|
114 |
+
pip install .
|
115 |
+
- name: "Run development test"
|
116 |
+
run: PYSR_TEST_JULIA_VERSION=${{ matrix.julia-version }} PYSR_TEST_PYTHON_VERSION=${{ matrix.python-version }} python -m pysr test dev
|
117 |
|
118 |
conda_test:
|
119 |
runs-on: ${{ matrix.os }}
|
|
|
151 |
- name: "Install PySR"
|
152 |
run: |
|
153 |
python3 -m pip install .
|
154 |
+
python3 -c 'import pysr'
|
155 |
- name: "Run tests"
|
156 |
+
run: cd /tmp && python -m pysr test main
|
157 |
|
158 |
coveralls:
|
159 |
name: Indicate completion to coveralls.io
|
|
|
195 |
- name: "Install PySR and all dependencies"
|
196 |
run: |
|
197 |
python -m pip install --upgrade pip
|
198 |
+
pip install .
|
199 |
+
pip install mypy
|
|
|
200 |
- name: "Install additional dependencies"
|
201 |
run: python -m pip install jax jaxlib torch
|
202 |
if: ${{ matrix.python-version != '3.7' }}
|
.github/workflows/CI_Windows.yml
CHANGED
@@ -52,16 +52,13 @@ jobs:
|
|
52 |
- name: "Install PySR"
|
53 |
run: |
|
54 |
python -m pip install --upgrade pip
|
55 |
-
pip install
|
56 |
-
|
57 |
-
python -
|
58 |
- name: "Run tests"
|
59 |
run: |
|
60 |
-
python -m pysr
|
61 |
-
python -m pysr.test cli
|
62 |
- name: "Install Torch"
|
63 |
run: pip install torch # (optional import)
|
64 |
- name: "Run Torch tests"
|
65 |
-
run: python -m pysr
|
66 |
-
- name: "Run custom env tests"
|
67 |
-
run: python -m pysr.test env
|
|
|
52 |
- name: "Install PySR"
|
53 |
run: |
|
54 |
python -m pip install --upgrade pip
|
55 |
+
pip install pytest nbval
|
56 |
+
pip install .
|
57 |
+
python -c 'import pysr'
|
58 |
- name: "Run tests"
|
59 |
run: |
|
60 |
+
python -m pysr test main,cli,startup
|
|
|
61 |
- name: "Install Torch"
|
62 |
run: pip install torch # (optional import)
|
63 |
- name: "Run Torch tests"
|
64 |
+
run: python -m pysr test torch
|
|
|
|
.github/workflows/CI_conda_forge.yml
CHANGED
@@ -40,4 +40,6 @@ jobs:
|
|
40 |
run: conda activate pysr-test && conda install pysr
|
41 |
if: ${{ !matrix.use-mamba }}
|
42 |
- name: "Run tests"
|
43 |
-
run:
|
|
|
|
|
|
40 |
run: conda activate pysr-test && conda install pysr
|
41 |
if: ${{ !matrix.use-mamba }}
|
42 |
- name: "Run tests"
|
43 |
+
run: |
|
44 |
+
pip install pytest nbval
|
45 |
+
python -m pysr test main,startup
|
.github/workflows/CI_docker.yml
CHANGED
@@ -37,4 +37,4 @@ jobs:
|
|
37 |
- name: Build docker
|
38 |
run: docker build --platform=${{ matrix.arch }} -t pysr .
|
39 |
- name: Test docker
|
40 |
-
run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c '
|
|
|
37 |
- name: Build docker
|
38 |
run: docker build --platform=${{ matrix.arch }} -t pysr .
|
39 |
- name: Test docker
|
40 |
+
run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup'
|
.github/workflows/CI_docker_large_nightly.yml
CHANGED
@@ -33,4 +33,4 @@ jobs:
|
|
33 |
- name: Build docker
|
34 |
run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} .
|
35 |
- name: Test docker
|
36 |
-
run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c '
|
|
|
33 |
- name: Build docker
|
34 |
run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} .
|
35 |
- name: Test docker
|
36 |
+
run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup'
|
.github/workflows/CI_large_nightly.yml
CHANGED
@@ -40,13 +40,11 @@ jobs:
|
|
40 |
- name: "Install PySR"
|
41 |
run: |
|
42 |
python -m pip install --upgrade pip
|
43 |
-
pip install
|
44 |
-
|
45 |
-
python -
|
|
|
|
|
|
|
46 |
- name: "Run tests"
|
47 |
-
run:
|
48 |
-
python -m pysr.test main
|
49 |
-
python -m pysr.test cli
|
50 |
-
- name: "Run new env test"
|
51 |
-
run: python -m pysr.test env
|
52 |
-
if: ${{ !(matrix.os == 'windows-latest' && matrix.python-version == '3.7') }}
|
|
|
40 |
- name: "Install PySR"
|
41 |
run: |
|
42 |
python -m pip install --upgrade pip
|
43 |
+
pip install pytest nbval
|
44 |
+
pip install .
|
45 |
+
python -c 'import pysr'
|
46 |
+
- name: "Assert Julia version"
|
47 |
+
if: ${{ matrix.julia-version != '1'}}
|
48 |
+
run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor"
|
49 |
- name: "Run tests"
|
50 |
+
run: python -m pysr test main,cli,startup
|
|
|
|
|
|
|
|
|
|
.github/workflows/CI_mac.yml
CHANGED
@@ -52,20 +52,17 @@ jobs:
|
|
52 |
- name: "Install PySR"
|
53 |
run: |
|
54 |
python -m pip install --upgrade pip
|
55 |
-
pip install
|
56 |
-
|
57 |
-
python -
|
58 |
- name: "Run tests"
|
59 |
run: |
|
60 |
-
python -m pysr
|
61 |
-
python -m pysr.test cli
|
62 |
- name: "Install JAX"
|
63 |
run: pip install jax jaxlib # (optional import)
|
64 |
- name: "Run JAX tests"
|
65 |
-
run: python -m pysr
|
66 |
- name: "Install Torch"
|
67 |
run: pip install torch # (optional import)
|
68 |
- name: "Run Torch tests"
|
69 |
-
run: python -m pysr
|
70 |
-
- name: "Run custom env tests"
|
71 |
-
run: python -m pysr.test env
|
|
|
52 |
- name: "Install PySR"
|
53 |
run: |
|
54 |
python -m pip install --upgrade pip
|
55 |
+
pip install pytest nbval
|
56 |
+
pip install .
|
57 |
+
python -c 'import pysr'
|
58 |
- name: "Run tests"
|
59 |
run: |
|
60 |
+
python -m pysr test main,cli,startup
|
|
|
61 |
- name: "Install JAX"
|
62 |
run: pip install jax jaxlib # (optional import)
|
63 |
- name: "Run JAX tests"
|
64 |
+
run: python -m pysr test jax
|
65 |
- name: "Install Torch"
|
66 |
run: pip install torch # (optional import)
|
67 |
- name: "Run Torch tests"
|
68 |
+
run: python -m pysr test torch
|
|
|
|
.pre-commit-config.yaml
CHANGED
@@ -13,11 +13,13 @@ repos:
|
|
13 |
hooks:
|
14 |
- id: black
|
15 |
- id: black-jupyter
|
|
|
16 |
# Stripping notebooks
|
17 |
- repo: https://github.com/kynan/nbstripout
|
18 |
rev: 0.6.1
|
19 |
hooks:
|
20 |
- id: nbstripout
|
|
|
21 |
# Unused imports
|
22 |
- repo: https://github.com/hadialqattan/pycln
|
23 |
rev: "v2.4.0"
|
|
|
13 |
hooks:
|
14 |
- id: black
|
15 |
- id: black-jupyter
|
16 |
+
exclude: pysr/test/test_nb.ipynb
|
17 |
# Stripping notebooks
|
18 |
- repo: https://github.com/kynan/nbstripout
|
19 |
rev: 0.6.1
|
20 |
hooks:
|
21 |
- id: nbstripout
|
22 |
+
exclude: pysr/test/test_nb.ipynb
|
23 |
# Unused imports
|
24 |
- repo: https://github.com/hadialqattan/pycln
|
25 |
rev: "v2.4.0"
|
CONTRIBUTORS.md
CHANGED
@@ -42,7 +42,7 @@ Scan through our [existing issues](https://github.com/MilesCranmer/PySR/issues)
|
|
42 |
check out the [guide](https://astroautomata.com/PySR/backend/) on modifying a custom SymbolicRegression.jl library.
|
43 |
In this case, you might instead be interested in making suggestions to the [SymbolicRegression.jl](http://github.com/MilesCranmer/SymbolicRegression.jl) library.
|
44 |
|
45 |
-
4. You can install your local version of PySR with `python setup.py install`, and run tests with `python -m pysr
|
46 |
|
47 |
### Commit your update
|
48 |
|
|
|
42 |
check out the [guide](https://astroautomata.com/PySR/backend/) on modifying a custom SymbolicRegression.jl library.
|
43 |
In this case, you might instead be interested in making suggestions to the [SymbolicRegression.jl](http://github.com/MilesCranmer/SymbolicRegression.jl) library.
|
44 |
|
45 |
+
4. You can install your local version of PySR with `python setup.py install`, and run tests with `python -m pysr test main`.
|
46 |
|
47 |
### Commit your update
|
48 |
|
Dockerfile
CHANGED
@@ -13,22 +13,23 @@ COPY --from=jl /usr/local/julia /usr/local/julia
|
|
13 |
ENV PATH="/usr/local/julia/bin:${PATH}"
|
14 |
|
15 |
# Install IPython and other useful libraries:
|
16 |
-
RUN pip install ipython matplotlib
|
17 |
|
18 |
WORKDIR /pysr
|
19 |
|
20 |
# Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
|
21 |
ADD ./requirements.txt /pysr/requirements.txt
|
22 |
-
RUN pip3 install -r /pysr/requirements.txt
|
23 |
|
24 |
# Install PySR:
|
25 |
# We do a minimal copy so it doesn't need to rerun at every file change:
|
|
|
26 |
ADD ./setup.py /pysr/setup.py
|
27 |
-
ADD ./pysr
|
28 |
-
RUN pip3 install .
|
29 |
|
30 |
# Install Julia pre-requisites:
|
31 |
-
RUN python3 -
|
32 |
|
33 |
# metainformation
|
34 |
LABEL org.opencontainers.image.authors = "Miles Cranmer"
|
|
|
13 |
ENV PATH="/usr/local/julia/bin:${PATH}"
|
14 |
|
15 |
# Install IPython and other useful libraries:
|
16 |
+
RUN pip install --no-cache-dir ipython matplotlib
|
17 |
|
18 |
WORKDIR /pysr
|
19 |
|
20 |
# Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
|
21 |
ADD ./requirements.txt /pysr/requirements.txt
|
22 |
+
RUN pip3 install --no-cache-dir -r /pysr/requirements.txt
|
23 |
|
24 |
# Install PySR:
|
25 |
# We do a minimal copy so it doesn't need to rerun at every file change:
|
26 |
+
ADD ./pyproject.toml /pysr/pyproject.toml
|
27 |
ADD ./setup.py /pysr/setup.py
|
28 |
+
ADD ./pysr /pysr/pysr
|
29 |
+
RUN pip3 install --no-cache-dir .
|
30 |
|
31 |
# Install Julia pre-requisites:
|
32 |
+
RUN python3 -c 'import pysr'
|
33 |
|
34 |
# metainformation
|
35 |
LABEL org.opencontainers.image.authors = "Miles Cranmer"
|
README.md
CHANGED
@@ -27,10 +27,6 @@ If you've finished a project with PySR, please submit a PR to showcase your work
|
|
27 |
- [Contributors](#contributors-)
|
28 |
- [Why PySR?](#why-pysr)
|
29 |
- [Installation](#installation)
|
30 |
-
- [pip](#pip)
|
31 |
-
- [conda](#conda)
|
32 |
-
- [docker](#docker-build)
|
33 |
-
- [Troubleshooting](#troubleshooting)
|
34 |
- [Quickstart](#quickstart)
|
35 |
- [→ Documentation](https://astroautomata.com/PySR)
|
36 |
|
@@ -129,48 +125,31 @@ an explicit and powerful way to interpret deep neural networks.
|
|
129 |
|
130 |
## Installation
|
131 |
|
132 |
-
|
133 |
-
|:---:|:---:|:---:|
|
134 |
-
| Everywhere (recommended) | Linux and Intel-based macOS | Everywhere (if all else fails) |
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
### pip
|
139 |
|
140 |
-
1. [Install Julia](https://julialang.org/downloads/)
|
141 |
-
- Alternatively, my personal preference is to use [juliaup](https://github.com/JuliaLang/juliaup#installation), which performs this automatically.
|
142 |
-
2. Then, run:
|
143 |
```bash
|
144 |
-
|
145 |
```
|
146 |
-
3. Finally, to install Julia dependencies:
|
147 |
-
```bash
|
148 |
-
python3 -m pysr install
|
149 |
-
```
|
150 |
-
> (Alternatively, from within Python, you can call `import pysr; pysr.install()`)
|
151 |
|
152 |
-
|
153 |
|
154 |
-
###
|
155 |
|
156 |
-
|
157 |
|
158 |
```bash
|
159 |
conda install -c conda-forge pysr
|
160 |
```
|
161 |
|
162 |
-
from within your target conda environment.
|
163 |
|
164 |
-
|
165 |
-
start time may be slightly slower as the JIT-compilation will be running.
|
166 |
-
(Once the compilation finishes, there will not be a performance difference though.)
|
167 |
-
|
168 |
-
---
|
169 |
|
170 |
-
|
171 |
|
172 |
1. Clone this repo.
|
173 |
-
2.
|
174 |
```bash
|
175 |
docker build -t pysr .
|
176 |
```
|
@@ -185,11 +164,7 @@ For more details, see the [docker section](#docker).
|
|
185 |
|
186 |
### Troubleshooting
|
187 |
|
188 |
-
|
189 |
-
To debug this, try running `python3 -c 'import os; print(os.environ["PATH"])'`.
|
190 |
-
If none of these folders contain your Julia binary, then you need to add Julia's `bin` folder to your `PATH` environment variable.
|
191 |
-
|
192 |
-
Another issue you might run into can result in a hard crash at import with
|
193 |
a message like "`GLIBCXX_...` not found". This is due to another one of the Python dependencies
|
194 |
loading an incorrect `libstdc++` library. To fix this, you should modify your
|
195 |
`LD_LIBRARY_PATH` variable to reference the Julia libraries. For example, if the Julia
|
@@ -202,7 +177,6 @@ export LD_LIBRARY_PATH=$HOME/.julia/juliaup/julia-1.10.0+0.x64.linux.gnu/lib/jul
|
|
202 |
|
203 |
to your `.bashrc` or `.zshrc` file.
|
204 |
|
205 |
-
**Running PySR on macOS with an M1 processor:** you should use the pip version, and make sure to get the Julia binary for ARM/M-series processors.
|
206 |
|
207 |
## Quickstart
|
208 |
|
@@ -240,7 +214,7 @@ model = PySRRegressor(
|
|
240 |
],
|
241 |
extra_sympy_mappings={"inv": lambda x: 1 / x},
|
242 |
# ^ Define operator for SymPy as well
|
243 |
-
|
244 |
# ^ Custom loss function (julia syntax)
|
245 |
)
|
246 |
```
|
@@ -323,7 +297,7 @@ model = PySRRegressor(
|
|
323 |
# ^ 2 populations per core, so one is always running.
|
324 |
population_size=50,
|
325 |
# ^ Slightly larger populations, for greater diversity.
|
326 |
-
|
327 |
# ^ Generations between migrations.
|
328 |
niterations=10000000, # Run forever
|
329 |
early_stop_condition=(
|
|
|
27 |
- [Contributors](#contributors-)
|
28 |
- [Why PySR?](#why-pysr)
|
29 |
- [Installation](#installation)
|
|
|
|
|
|
|
|
|
30 |
- [Quickstart](#quickstart)
|
31 |
- [→ Documentation](https://astroautomata.com/PySR)
|
32 |
|
|
|
125 |
|
126 |
## Installation
|
127 |
|
128 |
+
### Pip
|
|
|
|
|
129 |
|
130 |
+
You can install PySR with pip:
|
|
|
|
|
131 |
|
|
|
|
|
|
|
132 |
```bash
|
133 |
+
pip install pysr
|
134 |
```
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
+
Julia dependencies will be installed at first import.
|
137 |
|
138 |
+
### Conda
|
139 |
|
140 |
+
Similarly, with conda:
|
141 |
|
142 |
```bash
|
143 |
conda install -c conda-forge pysr
|
144 |
```
|
145 |
|
|
|
146 |
|
147 |
+
### Docker
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
You can also use the `Dockerfile` to install PySR in a docker container
|
150 |
|
151 |
1. Clone this repo.
|
152 |
+
2. Within the repo's directory, build the docker container:
|
153 |
```bash
|
154 |
docker build -t pysr .
|
155 |
```
|
|
|
164 |
|
165 |
### Troubleshooting
|
166 |
|
167 |
+
One issue you might run into can result in a hard crash at import with
|
|
|
|
|
|
|
|
|
168 |
a message like "`GLIBCXX_...` not found". This is due to another one of the Python dependencies
|
169 |
loading an incorrect `libstdc++` library. To fix this, you should modify your
|
170 |
`LD_LIBRARY_PATH` variable to reference the Julia libraries. For example, if the Julia
|
|
|
177 |
|
178 |
to your `.bashrc` or `.zshrc` file.
|
179 |
|
|
|
180 |
|
181 |
## Quickstart
|
182 |
|
|
|
214 |
],
|
215 |
extra_sympy_mappings={"inv": lambda x: 1 / x},
|
216 |
# ^ Define operator for SymPy as well
|
217 |
+
elementwise_loss="loss(prediction, target) = (prediction - target)^2",
|
218 |
# ^ Custom loss function (julia syntax)
|
219 |
)
|
220 |
```
|
|
|
297 |
# ^ 2 populations per core, so one is always running.
|
298 |
population_size=50,
|
299 |
# ^ Slightly larger populations, for greater diversity.
|
300 |
+
ncycles_per_iteration=500,
|
301 |
# ^ Generations between migrations.
|
302 |
niterations=10000000, # Run forever
|
303 |
early_stop_condition=(
|
TODO.md
DELETED
@@ -1,142 +0,0 @@
|
|
1 |
-
# TODO
|
2 |
-
|
3 |
-
- [x] Async threading, and have a server of equations. So that threads aren't waiting for others to finish.
|
4 |
-
- [x] Print out speed of equation evaluation over time. Measure time it takes per cycle
|
5 |
-
- [x] Add ability to pass an operator as an anonymous function string. E.g., `binary_operators=["g(x, y) = x+y"]`.
|
6 |
-
- [x] Add error bar capability (thanks Johannes Buchner for suggestion)
|
7 |
-
- [x] Why don't the constants continually change? It should optimize them every time the equation appears.
|
8 |
-
- Restart the optimizer to help with this.
|
9 |
-
- [x] Add several common unary and binary operators; list these.
|
10 |
-
- [x] Try other initial conditions for optimizer
|
11 |
-
- [x] Make scaling of changes to constant a hyperparameter
|
12 |
-
- [x] Make deletion op join deleted subtree to parent
|
13 |
-
- [x] Update hall of fame every iteration?
|
14 |
-
- Seems to overfit early if we do this.
|
15 |
-
- [x] Consider adding mutation to pass an operator in through a new binary operator (e.g., exp(x3)->plus(exp(x3), ...))
|
16 |
-
- (Added full insertion operator
|
17 |
-
- [x] Add a node at the top of a tree
|
18 |
-
- [x] Insert a node at the top of a subtree
|
19 |
-
- [x] Record very best individual in each population, and return at end.
|
20 |
-
- [x] Write our own tree copy operation; deepcopy() is the slowest operation by far.
|
21 |
-
- [x] Hyperparameter tune
|
22 |
-
- [x] Create a benchmark for accuracy
|
23 |
-
- [x] Add interface for either defining an operation to learn, or loading in arbitrary dataset.
|
24 |
-
- Could just write out the dataset in julia, or load it.
|
25 |
-
- [x] Create a Python interface
|
26 |
-
- [x] Explicit constant optimization on hall-of-fame
|
27 |
-
- Create method to find and return all constants, from left to right
|
28 |
-
- Create method to find and set all constants, in same order
|
29 |
-
- Pull up some optimization algorithm and add it. Keep the package small!
|
30 |
-
- [x] Create a benchmark for speed
|
31 |
-
- [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
|
32 |
-
- [x] Record hall of fame
|
33 |
-
- [x] Optionally (with hyperparameter) migrate the hall of fame, rather than current bests
|
34 |
-
- [x] Test performance of reduced precision integers
|
35 |
-
- No effect
|
36 |
-
- [x] Create struct to pass through all hyperparameters, instead of treating as constants
|
37 |
-
- Make sure doesn't affect performance
|
38 |
-
- [x] Rename package to avoid trademark issues
|
39 |
-
- PySR?
|
40 |
-
- [x] Put on PyPI
|
41 |
-
- [x] Treat baseline as a solution.
|
42 |
-
- [x] Print score alongside MSE: \delta \log(MSE)/\delta \log(complexity)
|
43 |
-
- [x] Calculating the loss function - there is duplicate calculations happening.
|
44 |
-
- [x] Declaration of the weights array every iteration
|
45 |
-
- [x] Sympy evaluation
|
46 |
-
- [x] Threaded recursion
|
47 |
-
- [x] Test suite
|
48 |
-
- [x] Performance: - Use an enum for functions instead of storing them?
|
49 |
-
- Gets ~40% speedup on small test.
|
50 |
-
- [x] Use @fastmath
|
51 |
-
- [x] Try @spawn over each sub-population. Do random sort, compute mutation for each, then replace 10% oldest.
|
52 |
-
- [x] Control max depth, rather than max number of nodes?
|
53 |
-
- [x] Allow user to pass names for variables - use these when printing
|
54 |
-
- [x] Check for domain errors in an equation quickly before actually running the entire array over it. (We do this now recursively - every single equation is checked for nans/infs when being computed.)
|
55 |
-
- [x] read the docs page
|
56 |
-
- [x] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas).
|
57 |
-
- [x] Better cleanup of zombie processes after <ctl-c>
|
58 |
-
- [x] Consider printing output sorted by score, not by complexity.
|
59 |
-
- [x] Increase max complexity slowly over time up to the actual max.
|
60 |
-
- [x] Record density over complexity. Favor equations that have a density we have not explored yet. Want the final density to be evenly distributed.
|
61 |
-
- [x] Do printing from Python side. Then we can do simplification and pretty-printing.
|
62 |
-
- [x] Sympy printing
|
63 |
-
- [x] Store Project.toml inside PySR's python code, rather than copied to site-packages.
|
64 |
-
- [ ] Sort these todo lists by priority
|
65 |
-
|
66 |
-
- [ ] Automatically convert log, log10, log2, pow to the correct operators.
|
67 |
-
- [ ] I think the simplification isn't working correctly (post-merging SymbolicUtils.)
|
68 |
-
- [ ] Show demo of PySRRegressor. Fit equations, then show how to view equations.
|
69 |
-
- [ ] Add "selected" column string to regular equations dict.
|
70 |
-
- [ ] List "Loss" instead of "MSE"
|
71 |
-
|
72 |
-
## Feature ideas
|
73 |
-
|
74 |
-
- [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).
|
75 |
-
- [ ] Other dtypes available
|
76 |
-
- [ ] NDSA-II
|
77 |
-
- [ ] Cross-validation
|
78 |
-
- [ ] Hierarchical model, so can re-use functional forms. Output of one equation goes into second equation?
|
79 |
-
- [ ] Add function to plot equations
|
80 |
-
- [ ] Refresh screen rather than dumping to stdout?
|
81 |
-
- [ ] Add ability to save state from python
|
82 |
-
- [ ] Additional degree operators?
|
83 |
-
- [ ] Multi targets (vector ops). Idea 1: Node struct contains argument for which registers it is applied to. Then, can work with multiple components simultaneously. Though this may be tricky to get right. Idea 2: each op is defined by input/output space. Some operators are flexible, and the spaces should be adjusted automatically. Otherwise, only consider ops that make a tree possible. But will need additional ops here to get it to work. Idea 3: define each equation in 2 parts: one part that is shared between all outputs, and one that is different between all outputs. Maybe this could be an array of nodes corresponding to each output. And those nodes would define their functions.
|
84 |
-
- Much easier option: simply flatten the output vector, and set the index as another input feature. The equation learned will be a single equation containing indices as a feature.
|
85 |
-
- [ ] Tree crossover? I.e., can take as input a part of the same equation, so long as it is the same level or below?
|
86 |
-
- [ ] Create flexible way of providing "simplification recipes." I.e., plus(plus(T, C), C) => plus(T, +(C, C)). The user could pass these.
|
87 |
-
- [ ] Consider allowing multi-threading turned off, for faster testing (cache issue on travis). Or could simply fix the caching issue there.
|
88 |
-
- [ ] Consider returning only the equation of interest; rather than all equations.
|
89 |
-
- [ ] Enable derivative operators. These would differentiate their right argument wrt their left argument, some input variable.
|
90 |
-
|
91 |
-
## Algorithmic performance ideas:
|
92 |
-
|
93 |
-
|
94 |
-
- [ ] Use package compiler and compile sr.jl into a standalone binary that can be used by pysr.
|
95 |
-
- [ ] When doing equation warmup, only migrate those equations with almost the same complexity. Rather than having to consider simple equations later in the game.
|
96 |
-
- [ ] Right now we only update the score based on some. Need to update score based on entire data! Note that optimizer only is used sometimes.
|
97 |
-
- [ ] Idea: use gradient of equation with respect to each operator (perhaps simply add to each operator) to tell which part is the most "sensitive" to changes. Then, perhaps insert/delete/mutate on that part of the tree?
|
98 |
-
- [ ] Start populations staggered; so that there is more frequent printing (and pops that start a bit later get hall of fame already)?
|
99 |
-
- [ ] Consider adding mutation for constant<->variable
|
100 |
-
- [ ] Implement more parts of the original Eureqa algorithms: https://www.creativemachineslab.com/eureqa.html
|
101 |
-
- [ ] Experiment with freezing parts of model; then we only append/delete at end of tree.
|
102 |
-
- [ ] Use NN to generate weights over all probability distribution conditional on error and existing equation, and train on some randomly-generated equations
|
103 |
-
- [ ] For hierarchical idea: after running some number of iterations, do a search for "most common pattern". Then, turn that subtree into its own operator.
|
104 |
-
- [ ] Calculate feature importances based on features we've already seen, then weight those features up in all random generations.
|
105 |
-
- [ ] Calculate feature importances of future mutations, by looking at correlation between residual of model, and the features.
|
106 |
-
- Store feature importances of future, and periodically update it.
|
107 |
-
- [ ] Punish depth rather than size, as depth really hurts during optimization.
|
108 |
-
|
109 |
-
|
110 |
-
## Code performance ideas:
|
111 |
-
|
112 |
-
- [ ] How hard is it to turn the recursive array evaluation into a for loop?
|
113 |
-
- [ ] Try defining a binary tree as an array, rather than a linked list. See https://stackoverflow.com/a/6384714/2689923
|
114 |
-
- in array branch
|
115 |
-
- [ ] Add true multi-node processing, with MPI, or just file sharing. Multiple populations per core.
|
116 |
-
- Ongoing in cluster branch
|
117 |
-
- [ ] Performance: try inling things?
|
118 |
-
- [ ] Try storing things like number nodes in a tree; then can iterate instead of counting
|
119 |
-
|
120 |
-
```julia
|
121 |
-
mutable struct Tree
|
122 |
-
degree::Array{Integer, 1}
|
123 |
-
val::Array{Float32, 1}
|
124 |
-
constant::Array{Bool, 1}
|
125 |
-
op::Array{Integer, 1}
|
126 |
-
Tree(s::Integer) = new(zeros(Integer, s), zeros(Float32, s), zeros(Bool, s), zeros(Integer, s))
|
127 |
-
end
|
128 |
-
```
|
129 |
-
|
130 |
-
- Then, we could even work with trees on the GPU, since they are all pre-allocated arrays.
|
131 |
-
- A population could be a Tree, but with degree 2 on all the degrees. So a slice of population arrays forms a tree.
|
132 |
-
- How many operations can we do via matrix ops? Mutate node=>easy.
|
133 |
-
- Can probably batch and do many operations at once across a population.
|
134 |
-
- Or, across all populations! Mutate operator: index 2D array and set it to random vector? But the indexing might hurt.
|
135 |
-
- The big advantage: can evaluate all new mutated trees at once; as massive matrix operation.
|
136 |
-
- Can control depth, rather than maxsize. Then just pretend all trees are full and same depth. Then we really don't need to care about depth.
|
137 |
-
|
138 |
-
- [ ] Can we cache calculations, or does the compiler do that? E.g., I should only have to run exp(x0) once; after that it should be read from memory.
|
139 |
-
- Done on caching branch. Currently am finding that this is quiet slow (presumably because memory allocation is the main issue).
|
140 |
-
- [ ] Add GPU capability?
|
141 |
-
- Not sure if possible, as binary trees are the real bottleneck.
|
142 |
-
- Could generate on CPU, evaluate score on GPU?
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/FeynmanEquations.csv
DELETED
@@ -1,101 +0,0 @@
|
|
1 |
-
Filename,datapoints,Number,Output,Formula,# variables,v1_name,v1_low,v1_high,v2_name,v2_low,v2_high,v3_name,v3_low,v3_high,v4_name,v4_low,v4_high,v5_name,v5_low,v5_high,v6_name,v6_low,v6_high,v7_name,v7_low,v7_high,v8_name,v8_low,v8_high,v9_name,v9_low,v9_high,v10_name,v10_low,v10_high
|
2 |
-
I.6.2a,10,1,f,exp(-theta**2/2)/sqrt(2*pi),1,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
3 |
-
I.6.2,100,2,f,exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma),2,sigma,1,3,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,
|
4 |
-
I.6.2b,1000,3,f,exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*sigma),3,sigma,1,3,theta,1,3,theta1,1,3,,,,,,,,,,,,,,,,,,,,,
|
5 |
-
I.8.14,100,4,d,sqrt((x2-x1)**2+(y2-y1)**2),4,x1,1,5,x2,1,5,y1,1,5,y2,1,5,,,,,,,,,,,,,,,,,,
|
6 |
-
I.9.18,1000000,5,F,G*m1*m2/((x2-x1)**2+(y2-y1)**2+(z2-z1)**2),9,m1,1,2,m2,1,2,G,1,2,x1,3,4,x2,1,2,y1,3,4,y2,1,2,z1,3,4,z2,1,2,,,
|
7 |
-
I.10.7,10,6,m,m_0/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
8 |
-
I.11.19,100,7,A,x1*y1+x2*y2+x3*y3,6,x1,1,5,x2,1,5,x3,1,5,y1,1,5,y2,1,5,y3,1,5,,,,,,,,,,,,
|
9 |
-
I.12.1,10,8,F,mu*Nn,2,mu,1,5,Nn,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
10 |
-
I.12.2,10,10,F,q1*q2*r/(4*pi*epsilon*r**3),4,q1,1,5,q2,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,
|
11 |
-
I.12.4,10,11,Ef,q1*r/(4*pi*epsilon*r**3),3,q1,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
12 |
-
I.12.5,10,12,F,q2*Ef,2,q2,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
13 |
-
I.12.11,10,13,F,q*(Ef+B*v*sin(theta)),5,q,1,5,Ef,1,5,B,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,
|
14 |
-
I.13.4,10,9,K,1/2*m*(v**2+u**2+w**2),4,m,1,5,v,1,5,u,1,5,w,1,5,,,,,,,,,,,,,,,,,,
|
15 |
-
I.13.12,10,14,U,G*m1*m2*(1/r2-1/r1),5,m1,1,5,m2,1,5,r1,1,5,r2,1,5,G,1,5,,,,,,,,,,,,,,,
|
16 |
-
I.14.3,10,15,U,m*g*z,3,m,1,5,g,1,5,z,1,5,,,,,,,,,,,,,,,,,,,,,
|
17 |
-
I.14.4,10,16,U,1/2*k_spring*x**2,2,k_spring,1,5,x,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
18 |
-
I.15.3x,10,17,x1,(x-u*t)/sqrt(1-u**2/c**2),4,x,5,10,u,1,2,c,3,20,t,1,2,,,,,,,,,,,,,,,,,,
|
19 |
-
I.15.3t,100,18,t1,(t-u*x/c**2)/sqrt(1-u**2/c**2),4,x,1,5,c,3,10,u,1,2,t,1,5,,,,,,,,,,,,,,,,,,
|
20 |
-
I.15.1,10,19,p,m_0*v/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
21 |
-
I.16.6,10,20,v1,(u+v)/(1+u*v/c**2),3,c,1,5,v,1,5,u,1,5,,,,,,,,,,,,,,,,,,,,,
|
22 |
-
I.18.4,10,21,r,(m1*r1+m2*r2)/(m1+m2),4,m1,1,5,m2,1,5,r1,1,5,r2,1,5,,,,,,,,,,,,,,,,,,
|
23 |
-
I.18.12,10,22,tau,r*F*sin(theta),3,r,1,5,F,1,5,theta,0,5,,,,,,,,,,,,,,,,,,,,,
|
24 |
-
I.18.14,10,23,L,m*r*v*sin(theta),4,m,1,5,r,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,,,,
|
25 |
-
I.24.6,10,24,E_n,1/2*m*(omega**2+omega_0**2)*1/2*x**2,4,m,1,3,omega,1,3,omega_0,1,3,x,1,3,,,,,,,,,,,,,,,,,,
|
26 |
-
I.25.13,10,25,Volt,q/C,2,q,1,5,C,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
27 |
-
I.26.2,100,26,theta1,arcsin(n*sin(theta2)),2,n,0,1,theta2,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
28 |
-
I.27.6,10,27,foc,1/(1/d1+n/d2),3,d1,1,5,d2,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
|
29 |
-
I.29.4,10,28,k,omega/c,2,omega,1,10,c,1,10,,,,,,,,,,,,,,,,,,,,,,,,
|
30 |
-
I.29.16,1000,29,x,sqrt(x1**2+x2**2-2*x1*x2*cos(theta1-theta2)),4,x1,1,5,x2,1,5,theta1,1,5,theta2,1,5,,,,,,,,,,,,,,,,,,
|
31 |
-
I.30.3,100,30,Int,Int_0*sin(n*theta/2)**2/sin(theta/2)**2,3,Int_0,1,5,theta,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
|
32 |
-
I.30.5,100,31,theta,arcsin(lambd/(n*d)),3,lambd,1,2,d,2,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
|
33 |
-
I.32.5,10,32,Pwr,q**2*a**2/(6*pi*epsilon*c**3),4,q,1,5,a,1,5,epsilon,1,5,c,1,5,,,,,,,,,,,,,,,,,,
|
34 |
-
I.32.17,10,33,Pwr,(1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2),6,epsilon,1,2,c,1,2,Ef,1,2,r,1,2,omega,1,2,omega_0,3,5,,,,,,,,,,,,
|
35 |
-
I.34.8,10,34,omega,q*v*B/p,4,q,1,5,v,1,5,B,1,5,p,1,5,,,,,,,,,,,,,,,,,,
|
36 |
-
I.34.1,10,35,omega,omega_0/(1-v/c),3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
|
37 |
-
I.34.14,10,36,omega,(1+v/c)/sqrt(1-v**2/c**2)*omega_0,3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
|
38 |
-
I.34.27,10,37,E_n,(h/(2*pi))*omega,2,omega,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
39 |
-
I.37.4,100,38,Int,I1+I2+2*sqrt(I1*I2)*cos(delta),3,I1,1,5,I2,1,5,delta,1,5,,,,,,,,,,,,,,,,,,,,,
|
40 |
-
I.38.12,10,39,r,4*pi*epsilon*(h/(2*pi))**2/(m*q**2),4,m,1,5,q,1,5,h,1,5,epsilon,1,5,,,,,,,,,,,,,,,,,,
|
41 |
-
I.39.1,10,40,E_n,3/2*pr*V,2,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
42 |
-
I.39.11,10,41,E_n,1/(gamma-1)*pr*V,3,gamma,2,5,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,
|
43 |
-
I.39.22,10,42,pr,n*kb*T/V,4,n,1,5,T,1,5,V,1,5,kb,1,5,,,,,,,,,,,,,,,,,,
|
44 |
-
I.40.1,10,43,n,n_0*exp(-m*g*x/(kb*T)),6,n_0,1,5,m,1,5,x,1,5,T,1,5,g,1,5,kb,1,5,,,,,,,,,,,,
|
45 |
-
I.41.16,10,44,L_rad,h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1)),5,omega,1,5,T,1,5,h,1,5,kb,1,5,c,1,5,,,,,,,,,,,,,,,
|
46 |
-
I.43.16,10,45,v,mu_drift*q*Volt/d,4,mu_drift,1,5,q,1,5,Volt,1,5,d,1,5,,,,,,,,,,,,,,,,,,
|
47 |
-
I.43.31,10,46,D,mob*kb*T,3,mob,1,5,T,1,5,kb,1,5,,,,,,,,,,,,,,,,,,,,,
|
48 |
-
I.43.43,10,47,kappa,1/(gamma-1)*kb*v/A,4,gamma,2,5,kb,1,5,A,1,5,v,1,5,,,,,,,,,,,,,,,,,,
|
49 |
-
I.44.4,10,48,E_n,n*kb*T*ln(V2/V1),5,n,1,5,kb,1,5,T,1,5,V1,1,5,V2,1,5,,,,,,,,,,,,,,,
|
50 |
-
I.47.23,10,49,c,sqrt(gamma*pr/rho),3,gamma,1,5,pr,1,5,rho,1,5,,,,,,,,,,,,,,,,,,,,,
|
51 |
-
I.48.2,100,50,E_n,m*c**2/sqrt(1-v**2/c**2),3,m,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
52 |
-
I.50.26,10,51,x,x1*(cos(omega*t)+alpha*cos(omega*t)**2),4,x1,1,3,omega,1,3,t,1,3,alpha,1,3,,,,,,,,,,,,,,,,,,
|
53 |
-
II.2.42,10,52,Pwr,kappa*(T2-T1)*A/d,5,kappa,1,5,T1,1,5,T2,1,5,A,1,5,d,1,5,,,,,,,,,,,,,,,
|
54 |
-
II.3.24,10,53,flux,Pwr/(4*pi*r**2),2,Pwr,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
55 |
-
II.4.23,10,54,Volt,q/(4*pi*epsilon*r),3,q,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
56 |
-
II.6.11,10,55,Volt,1/(4*pi*epsilon)*p_d*cos(theta)/r**2,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
|
57 |
-
II.6.15a,1000,56,Ef,p_d/(4*pi*epsilon)*3*z/r**5*sqrt(x**2+y**2),6,epsilon,1,3,p_d,1,3,r,1,3,x,1,3,y,1,3,z,1,3,,,,,,,,,,,,
|
58 |
-
II.6.15b,10,57,Ef,p_d/(4*pi*epsilon)*3*cos(theta)*sin(theta)/r**3,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
|
59 |
-
II.8.7,10,58,E_n,3/5*q**2/(4*pi*epsilon*d),3,q,1,5,epsilon,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
60 |
-
II.8.31,10,59,E_den,epsilon*Ef**2/2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
61 |
-
II.10.9,10,60,Ef,sigma_den/epsilon*1/(1+chi),3,sigma_den,1,5,epsilon,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
|
62 |
-
II.11.3,10,61,x,q*Ef/(m*(omega_0**2-omega**2)),5,q,1,3,Ef,1,3,m,1,3,omega_0,3,5,omega,1,2,,,,,,,,,,,,,,,
|
63 |
-
II.11.17,10,62,n,n_0*(1+p_d*Ef*cos(theta)/(kb*T)),6,n_0,1,3,kb,1,3,T,1,3,theta,1,3,p_d,1,3,Ef,1,3,,,,,,,,,,,,
|
64 |
-
II.11.20,10,63,Pol,n_rho*p_d**2*Ef/(3*kb*T),5,n_rho,1,5,p_d,1,5,Ef,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
|
65 |
-
II.11.27,100,64,Pol,n*alpha/(1-(n*alpha/3))*epsilon*Ef,4,n,0,1,alpha,0,1,epsilon,1,2,Ef,1,2,,,,,,,,,,,,,,,,,,
|
66 |
-
II.11.28,100,65,theta,1+n*alpha/(1-(n*alpha/3)),2,n,0,1,alpha,0,1,,,,,,,,,,,,,,,,,,,,,,,,
|
67 |
-
II.13.17,10,66,B,1/(4*pi*epsilon*c**2)*2*I/r,4,epsilon,1,5,c,1,5,I,1,5,r,1,5,,,,,,,,,,,,,,,,,,
|
68 |
-
II.13.23,100,67,rho_c,rho_c_0/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
69 |
-
II.13.34,10,68,j,rho_c_0*v/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
70 |
-
II.15.4,10,69,E_n,-mom*B*cos(theta),3,mom,1,5,B,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
|
71 |
-
II.15.5,10,70,E_n,-p_d*Ef*cos(theta),3,p_d,1,5,Ef,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
|
72 |
-
II.21.32,10,71,Volt,q/(4*pi*epsilon*r*(1-v/c)),5,q,1,5,epsilon,1,5,r,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,
|
73 |
-
II.24.17,10,72,k,sqrt(omega**2/c**2-pi**2/d**2),3,omega,4,6,c,1,2,d,2,4,,,,,,,,,,,,,,,,,,,,,
|
74 |
-
II.27.16,10,73,flux,epsilon*c*Ef**2,3,epsilon,1,5,c,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,
|
75 |
-
II.27.18,10,74,E_den,epsilon*Ef**2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
76 |
-
II.34.2a,10,75,I,q*v/(2*pi*r),3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
77 |
-
II.34.2,10,76,mom,q*v*r/2,3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
78 |
-
II.34.11,10,77,omega,g_*q*B/(2*m),4,g_,1,5,q,1,5,B,1,5,m,1,5,,,,,,,,,,,,,,,,,,
|
79 |
-
II.34.29a,10,78,mom,q*h/(4*pi*m),3,q,1,5,h,1,5,m,1,5,,,,,,,,,,,,,,,,,,,,,
|
80 |
-
II.34.29b,10,79,E_n,g_*mom*B*Jz/(h/(2*pi)),5,g_,1,5,h,1,5,Jz,1,5,mom,1,5,B,1,5,,,,,,,,,,,,,,,
|
81 |
-
II.35.18,10,80,n,n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T))),5,n_0,1,3,kb,1,3,T,1,3,mom,1,3,B,1,3,,,,,,,,,,,,,,,
|
82 |
-
II.35.21,10,81,M,n_rho*mom*tanh(mom*B/(kb*T)),5,n_rho,1,5,mom,1,5,B,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
|
83 |
-
II.36.38,10,82,f,mom*H/(kb*T)+(mom*alpha)/(epsilon*c**2*kb*T)*M,8,mom,1,3,H,1,3,kb,1,3,T,1,3,alpha,1,3,epsilon,1,3,c,1,3,M,1,3,,,,,,
|
84 |
-
II.37.1,10,83,E_n,mom*(1+chi)*B,3,mom,1,5,B,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
|
85 |
-
II.38.3,10,84,F,Y*A*x/d,4,Y,1,5,A,1,5,d,1,5,x,1,5,,,,,,,,,,,,,,,,,,
|
86 |
-
II.38.14,10,85,mu_S,Y/(2*(1+sigma)),2,Y,1,5,sigma,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
87 |
-
III.4.32,10,86,n,1/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
|
88 |
-
III.4.33,10,87,E_n,(h/(2*pi))*omega/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
|
89 |
-
III.7.38,10,88,omega,2*mom*B/(h/(2*pi)),3,mom,1,5,B,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,
|
90 |
-
III.8.54,10,89,prob,sin(E_n*t/(h/(2*pi)))**2,3,E_n,1,2,t,1,2,h,1,4,,,,,,,,,,,,,,,,,,,,,
|
91 |
-
III.9.52,1000,90,prob,(p_d*Ef*t/(h/(2*pi)))*sin((omega-omega_0)*t/2)**2/((omega-omega_0)*t/2)**2,6,p_d,1,3,Ef,1,3,t,1,3,h,1,3,omega,1,5,omega_0,1,5,,,,,,,,,,,,
|
92 |
-
III.10.19,100,91,E_n,mom*sqrt(Bx**2+By**2+Bz**2),4,mom,1,5,Bx,1,5,By,1,5,Bz,1,5,,,,,,,,,,,,,,,,,,
|
93 |
-
III.12.43,10,92,L,n*(h/(2*pi)),2,n,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
94 |
-
III.13.18,10,93,v,2*E_n*d**2*k/(h/(2*pi)),4,E_n,1,5,d,1,5,k,1,5,h,1,5,,,,,,,,,,,,,,,,,,
|
95 |
-
III.14.14,10,94,I,I_0*(exp(q*Volt/(kb*T))-1),5,I_0,1,5,q,1,2,Volt,1,2,kb,1,2,T,1,2,,,,,,,,,,,,,,,
|
96 |
-
III.15.12,10,95,E_n,2*U*(1-cos(k*d)),3,U,1,5,k,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
97 |
-
III.15.14,10,96,m,(h/(2*pi))**2/(2*E_n*d**2),3,h,1,5,E_n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
98 |
-
III.15.27,10,97,k,2*pi*alpha/(n*d),3,alpha,1,5,n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
99 |
-
III.17.37,10,98,f,beta*(1+alpha*cos(theta)),3,beta,1,5,alpha,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
|
100 |
-
III.19.51,10,99,E_n,-m*q**4/(2*(4*pi*epsilon)**2*(h/(2*pi))**2)*(1/n**2),5,m,1,5,q,1,5,h,1,5,n,1,5,epsilon,1,5,,,,,,,,,,,,,,,
|
101 |
-
III.21.20,10,100,j,-rho_c_0*q*A_vec/m,4,rho_c_0,1,5,q,1,5,A_vec,1,5,m,1,5,,,,,,,,,,,,,,,,,,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/backend.md
CHANGED
@@ -2,27 +2,73 @@
|
|
2 |
|
3 |
If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend.
|
4 |
The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl).
|
5 |
-
This package is accessed with [`
|
6 |
|
7 |
PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself.
|
8 |
Generally you can do this as follows:
|
9 |
|
10 |
-
1.
|
11 |
-
|
|
|
|
|
|
|
12 |
git clone https://github.com/MilesCranmer/SymbolicRegression.jl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
```
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
- Note that it will automatically update your project by default; to turn this off, set `update=False`.
|
26 |
|
27 |
If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://astroautomata.com/SymbolicRegression.jl/dev/).
|
28 |
|
|
|
2 |
|
3 |
If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend.
|
4 |
The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl).
|
5 |
+
This package is accessed with [`juliacall`](https://github.com/JuliaPy/PythonCall.jl), which allows us to transfer objects back and forth between the Python and Julia runtimes.
|
6 |
|
7 |
PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself.
|
8 |
Generally you can do this as follows:
|
9 |
|
10 |
+
## 1. Check out the source code
|
11 |
+
|
12 |
+
Clone a copy of the backend as well as PySR:
|
13 |
+
|
14 |
+
```bash
|
15 |
git clone https://github.com/MilesCranmer/SymbolicRegression.jl
|
16 |
+
git clone https://github.com/MilesCranmer/PySR
|
17 |
+
```
|
18 |
+
|
19 |
+
You may wish to check out the specific versions, which you can do with:
|
20 |
+
|
21 |
+
```bash
|
22 |
+
cd PySR
|
23 |
+
git checkout <version>
|
24 |
+
|
25 |
+
# You can see the current backend version in `pysr/juliapkg.json`
|
26 |
+
cd ../SymbolicRegression.jl
|
27 |
+
git checkout <backend_version>
|
28 |
+
```
|
29 |
+
|
30 |
+
## 2. Edit the source to your requirements
|
31 |
+
|
32 |
+
The main search code can be found in `src/SymbolicRegression.jl`.
|
33 |
+
|
34 |
+
Here are some tips:
|
35 |
+
|
36 |
+
- The documentation for the backend is given [here](https://astroautomata.com/SymbolicRegression.jl/dev/).
|
37 |
+
- Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time.
|
38 |
+
- Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://astroautomata.com/SymbolicRegression.jl/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node).
|
39 |
+
- For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl).
|
40 |
+
- Parts of the code which are typically edited by users include:
|
41 |
+
- [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions.
|
42 |
+
- Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function.
|
43 |
+
- [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options.
|
44 |
+
|
45 |
+
## 3. Let PySR use the modified backend
|
46 |
+
|
47 |
+
Once you have made your changes, you should edit the `pysr/juliapkg.json` file
|
48 |
+
in the PySR repository to point to this local copy.
|
49 |
+
Do this by removing the `"version"` key and adding a `"dev"` and `"path"` key:
|
50 |
+
|
51 |
+
```json
|
52 |
+
...
|
53 |
+
"packages": {
|
54 |
+
"SymbolicRegression": {
|
55 |
+
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
56 |
+
"dev": true,
|
57 |
+
"path": "/path/to/SymbolicRegression.jl"
|
58 |
+
},
|
59 |
+
...
|
60 |
```
|
61 |
+
|
62 |
+
You can then install PySR with this modified backend by running:
|
63 |
+
|
64 |
+
```bash
|
65 |
+
cd PySR
|
66 |
+
pip install .
|
67 |
+
```
|
68 |
+
|
69 |
+
For more information on `juliapkg.json`, see [`pyjuliapkg`](https://github.com/JuliaPy/pyjuliapkg).
|
70 |
+
|
71 |
+
## Additional notes
|
|
|
72 |
|
73 |
If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://astroautomata.com/SymbolicRegression.jl/dev/).
|
74 |
|
docs/examples.md
CHANGED
@@ -144,7 +144,7 @@ but there are still some additional steps you can take to reduce the effect of n
|
|
144 |
|
145 |
One thing you could do, which we won't detail here, is to create a custom log-likelihood
|
146 |
given some assumed noise model. By passing weights to the fit function, and
|
147 |
-
defining a custom loss function such as `
|
148 |
you can define any sort of log-likelihood you wish. (However, note that it must be bounded at zero)
|
149 |
|
150 |
However, the simplest thing to do is preprocessing, just like for feature selection. To do this,
|
@@ -189,12 +189,10 @@ where $p_i$ is the $i$th prime number, and $x$ is the input feature.
|
|
189 |
Let's see if we can discover this using
|
190 |
the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package.
|
191 |
|
192 |
-
First, let's
|
193 |
-
(here, with 8 threads and `-O3`):
|
194 |
|
195 |
```python
|
196 |
-
import
|
197 |
-
jl = pysr.julia_helpers.init_julia(julia_kwargs={"threads": 8, "optimize": 3})
|
198 |
```
|
199 |
|
200 |
`jl` stores the Julia runtime.
|
@@ -203,7 +201,7 @@ Now, let's run some Julia code to add the Primes.jl
|
|
203 |
package to the PySR environment:
|
204 |
|
205 |
```python
|
206 |
-
jl.
|
207 |
import Pkg
|
208 |
Pkg.add("Primes")
|
209 |
""")
|
@@ -213,13 +211,13 @@ This imports the Julia package manager, and uses it to install
|
|
213 |
`Primes.jl`. Now let's import `Primes.jl`:
|
214 |
|
215 |
```python
|
216 |
-
jl.
|
217 |
```
|
218 |
|
219 |
Now, we define a custom operator:
|
220 |
|
221 |
```python
|
222 |
-
jl.
|
223 |
function p(i::T) where T
|
224 |
if (0.5 < i < 1000)
|
225 |
return T(Primes.prime(round(Int, i)))
|
@@ -237,7 +235,7 @@ If in-bounds, it rounds it to the nearest integer, compures the corresponding pr
|
|
237 |
converts it to the same type as input.
|
238 |
|
239 |
Next, let's generate a list of primes for our test dataset.
|
240 |
-
Since we are using
|
241 |
|
242 |
```python
|
243 |
primes = {i: jl.p(i*1.0) for i in range(1, 999)}
|
@@ -382,7 +380,7 @@ end
|
|
382 |
model = PySRRegressor(
|
383 |
niterations=100,
|
384 |
binary_operators=["*", "+", "-"],
|
385 |
-
|
386 |
)
|
387 |
```
|
388 |
|
@@ -464,7 +462,7 @@ let's also create a custom loss function
|
|
464 |
that looks at the error in log-space:
|
465 |
|
466 |
```python
|
467 |
-
|
468 |
scatter_loss = abs(log((abs(prediction)+1e-20) / (abs(target)+1e-20)))
|
469 |
sign_loss = 10 * (sign(prediction) - sign(target))^2
|
470 |
return scatter_loss + sign_loss
|
@@ -478,7 +476,7 @@ Now let's define our model:
|
|
478 |
model = PySRRegressor(
|
479 |
binary_operators=["+", "-", "*", "/"],
|
480 |
unary_operators=["square"],
|
481 |
-
|
482 |
complexity_of_constants=2,
|
483 |
maxsize=25,
|
484 |
niterations=100,
|
|
|
144 |
|
145 |
One thing you could do, which we won't detail here, is to create a custom log-likelihood
|
146 |
given some assumed noise model. By passing weights to the fit function, and
|
147 |
+
defining a custom loss function such as `elementwise_loss="myloss(x, y, w) = w * (x - y)^2"`,
|
148 |
you can define any sort of log-likelihood you wish. (However, note that it must be bounded at zero)
|
149 |
|
150 |
However, the simplest thing to do is preprocessing, just like for feature selection. To do this,
|
|
|
189 |
Let's see if we can discover this using
|
190 |
the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package.
|
191 |
|
192 |
+
First, let's get the Julia backend:
|
|
|
193 |
|
194 |
```python
|
195 |
+
from pysr import jl
|
|
|
196 |
```
|
197 |
|
198 |
`jl` stores the Julia runtime.
|
|
|
201 |
package to the PySR environment:
|
202 |
|
203 |
```python
|
204 |
+
jl.seval("""
|
205 |
import Pkg
|
206 |
Pkg.add("Primes")
|
207 |
""")
|
|
|
211 |
`Primes.jl`. Now let's import `Primes.jl`:
|
212 |
|
213 |
```python
|
214 |
+
jl.seval("import Primes")
|
215 |
```
|
216 |
|
217 |
Now, we define a custom operator:
|
218 |
|
219 |
```python
|
220 |
+
jl.seval("""
|
221 |
function p(i::T) where T
|
222 |
if (0.5 < i < 1000)
|
223 |
return T(Primes.prime(round(Int, i)))
|
|
|
235 |
converts it to the same type as input.
|
236 |
|
237 |
Next, let's generate a list of primes for our test dataset.
|
238 |
+
Since we are using juliacall, we can just call `p` directly to do this:
|
239 |
|
240 |
```python
|
241 |
primes = {i: jl.p(i*1.0) for i in range(1, 999)}
|
|
|
380 |
model = PySRRegressor(
|
381 |
niterations=100,
|
382 |
binary_operators=["*", "+", "-"],
|
383 |
+
loss_function=objective,
|
384 |
)
|
385 |
```
|
386 |
|
|
|
462 |
that looks at the error in log-space:
|
463 |
|
464 |
```python
|
465 |
+
elementwise_loss = """function loss_fnc(prediction, target)
|
466 |
scatter_loss = abs(log((abs(prediction)+1e-20) / (abs(target)+1e-20)))
|
467 |
sign_loss = 10 * (sign(prediction) - sign(target))^2
|
468 |
return scatter_loss + sign_loss
|
|
|
476 |
model = PySRRegressor(
|
477 |
binary_operators=["+", "-", "*", "/"],
|
478 |
unary_operators=["square"],
|
479 |
+
elementwise_loss=elementwise_loss,
|
480 |
complexity_of_constants=2,
|
481 |
maxsize=25,
|
482 |
niterations=100,
|
docs/options.md
CHANGED
@@ -78,11 +78,11 @@ with the equations.
|
|
78 |
Each cycle considers every 10-equation subsample (re-sampled for each individual 10,
|
79 |
unless `fast_cycle` is set in which case the subsamples are separate groups of equations)
|
80 |
a single time, producing one mutated equation for each.
|
81 |
-
The parameter `
|
82 |
occurs before the equations are compared to the hall of fame,
|
83 |
and new equations are migrated from the hall of fame, or from other populations.
|
84 |
It also controls how slowly annealing occurs. You may find that increasing
|
85 |
-
`
|
86 |
worker needs to reduce and distribute new equations less often, and also increases
|
87 |
diversity. But at the same
|
88 |
time, a smaller number it might be that migrating equations from the hall of fame helps
|
@@ -243,7 +243,7 @@ train the parameters within JAX (and is differentiable).
|
|
243 |
|
244 |
The default loss is mean-square error, and weighted mean-square error.
|
245 |
One can pass an arbitrary Julia string to define a custom loss, using,
|
246 |
-
e.g., `
|
247 |
see the
|
248 |
[Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/)
|
249 |
page for SymbolicRegression.jl.
|
@@ -253,26 +253,26 @@ Here are some additional examples:
|
|
253 |
abs(x-y) loss
|
254 |
|
255 |
```python
|
256 |
-
PySRRegressor(...,
|
257 |
```
|
258 |
|
259 |
Note that the function name doesn't matter:
|
260 |
|
261 |
```python
|
262 |
-
PySRRegressor(...,
|
263 |
```
|
264 |
|
265 |
With weights:
|
266 |
|
267 |
```python
|
268 |
-
model = PySRRegressor(...,
|
269 |
model.fit(..., weights=weights)
|
270 |
```
|
271 |
|
272 |
Weights can be used in arbitrary ways:
|
273 |
|
274 |
```python
|
275 |
-
model = PySRRegressor(..., weights=weights,
|
276 |
model.fit(..., weights=weights)
|
277 |
```
|
278 |
|
@@ -280,13 +280,13 @@ Built-in loss (faster) (see [losses](https://astroautomata.com/SymbolicRegressio
|
|
280 |
This one computes the L3 norm:
|
281 |
|
282 |
```python
|
283 |
-
PySRRegressor(...,
|
284 |
```
|
285 |
|
286 |
Can also uses these losses for weighted (weighted-average):
|
287 |
|
288 |
```python
|
289 |
-
model = PySRRegressor(..., weights=weights,
|
290 |
model.fit(..., weights=weights)
|
291 |
```
|
292 |
|
|
|
78 |
Each cycle considers every 10-equation subsample (re-sampled for each individual 10,
|
79 |
unless `fast_cycle` is set in which case the subsamples are separate groups of equations)
|
80 |
a single time, producing one mutated equation for each.
|
81 |
+
The parameter `ncycles_per_iteration` defines how many times this
|
82 |
occurs before the equations are compared to the hall of fame,
|
83 |
and new equations are migrated from the hall of fame, or from other populations.
|
84 |
It also controls how slowly annealing occurs. You may find that increasing
|
85 |
+
`ncycles_per_iteration` results in a higher cycles-per-second, as the head
|
86 |
worker needs to reduce and distribute new equations less often, and also increases
|
87 |
diversity. But at the same
|
88 |
time, a smaller number it might be that migrating equations from the hall of fame helps
|
|
|
243 |
|
244 |
The default loss is mean-square error, and weighted mean-square error.
|
245 |
One can pass an arbitrary Julia string to define a custom loss, using,
|
246 |
+
e.g., `elementwise_loss="myloss(x, y) = abs(x - y)^1.5"`. For more details,
|
247 |
see the
|
248 |
[Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/)
|
249 |
page for SymbolicRegression.jl.
|
|
|
253 |
abs(x-y) loss
|
254 |
|
255 |
```python
|
256 |
+
PySRRegressor(..., elementwise_loss="f(x, y) = abs(x - y)^1.5")
|
257 |
```
|
258 |
|
259 |
Note that the function name doesn't matter:
|
260 |
|
261 |
```python
|
262 |
+
PySRRegressor(..., elementwise_loss="loss(x, y) = abs(x * y)")
|
263 |
```
|
264 |
|
265 |
With weights:
|
266 |
|
267 |
```python
|
268 |
+
model = PySRRegressor(..., elementwise_loss="myloss(x, y, w) = w * abs(x - y)")
|
269 |
model.fit(..., weights=weights)
|
270 |
```
|
271 |
|
272 |
Weights can be used in arbitrary ways:
|
273 |
|
274 |
```python
|
275 |
+
model = PySRRegressor(..., weights=weights, elementwise_loss="myloss(x, y, w) = abs(x - y)^2/w^2")
|
276 |
model.fit(..., weights=weights)
|
277 |
```
|
278 |
|
|
|
280 |
This one computes the L3 norm:
|
281 |
|
282 |
```python
|
283 |
+
PySRRegressor(..., elementwise_loss="LPDistLoss{3}()")
|
284 |
```
|
285 |
|
286 |
Can also uses these losses for weighted (weighted-average):
|
287 |
|
288 |
```python
|
289 |
+
model = PySRRegressor(..., weights=weights, elementwise_loss="LPDistLoss{3}()")
|
290 |
model.fit(..., weights=weights)
|
291 |
```
|
292 |
|
docs/tuning.md
CHANGED
@@ -14,12 +14,12 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
|
|
14 |
2. Use only the operators I think it needs and no more.
|
15 |
3. Increase `populations` to `3*num_cores`.
|
16 |
4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data).
|
17 |
-
5. While on a laptop or single node machine, you might leave the default `
|
18 |
6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!)
|
19 |
7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore.
|
20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
22 |
-
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `
|
23 |
11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup.
|
24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
25 |
|
|
|
14 |
2. Use only the operators I think it needs and no more.
|
15 |
3. Increase `populations` to `3*num_cores`.
|
16 |
4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data).
|
17 |
+
5. While on a laptop or single node machine, you might leave the default `ncycles_per_iteration`, on a cluster with ~100 cores I like to set `ncycles_per_iteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!)
|
18 |
6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!)
|
19 |
7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore.
|
20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
22 |
+
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
23 |
11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup.
|
24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
25 |
|
environment.yml
CHANGED
@@ -2,12 +2,11 @@ name: test
|
|
2 |
channels:
|
3 |
- conda-forge
|
4 |
dependencies:
|
5 |
-
-
|
6 |
-
-
|
7 |
-
-
|
8 |
-
-
|
9 |
-
-
|
10 |
-
-
|
11 |
-
-
|
12 |
-
-
|
13 |
-
- click
|
|
|
2 |
channels:
|
3 |
- conda-forge
|
4 |
dependencies:
|
5 |
+
- python>=3.7
|
6 |
+
- sympy>=1.0.0,<2.0.0
|
7 |
+
- pandas>=0.21.0,<3.0.0
|
8 |
+
- numpy>=1.13.0,<2.0.0
|
9 |
+
- scikit-learn>=1.0.0,<2.0.0
|
10 |
+
- pyjuliacall>=0.9.15,<0.10.0
|
11 |
+
- click>=7.0.0,<9.0.0
|
12 |
+
- typing_extensions>=4.0.0,<5.0.0
|
|
example.py
CHANGED
@@ -18,7 +18,7 @@ model = PySRRegressor(
|
|
18 |
],
|
19 |
extra_sympy_mappings={"inv": lambda x: 1 / x},
|
20 |
# ^ Define operator for SymPy as well
|
21 |
-
|
22 |
# ^ Custom loss function (julia syntax)
|
23 |
)
|
24 |
|
|
|
18 |
],
|
19 |
extra_sympy_mappings={"inv": lambda x: 1 / x},
|
20 |
# ^ Define operator for SymPy as well
|
21 |
+
elementwise_loss="loss(x, y) = (x - y)^2",
|
22 |
# ^ Custom loss function (julia syntax)
|
23 |
)
|
24 |
|
examples/pysr_demo.ipynb
CHANGED
@@ -15,68 +15,9 @@
|
|
15 |
"id": "tQ1r1bbb0yBv"
|
16 |
},
|
17 |
"source": [
|
18 |
-
"\n",
|
19 |
"## Instructions\n",
|
20 |
"1. Work on a copy of this notebook: _File_ > _Save a copy in Drive_ (you will need a Google account).\n",
|
21 |
-
"2. (Optional) If you would like to do the deep learning component of this tutorial, turn on the GPU with Edit->Notebook settings->Hardware accelerator->GPU\n"
|
22 |
-
"3. Execute the following cell (click on it and press Ctrl+Enter) to install Julia. This may take a minute or so.\n",
|
23 |
-
"4. Continue to the next section.\n",
|
24 |
-
"\n",
|
25 |
-
"_Notes_:\n",
|
26 |
-
"* If your Colab Runtime gets reset (e.g., due to inactivity), repeat steps 3, 4.\n",
|
27 |
-
"* After installation, if you want to change the Julia version or activate/deactivate the GPU, you will need to reset the Runtime: _Runtime_ > _Delete and disconnect runtime_ and repeat steps 2-4."
|
28 |
-
]
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"cell_type": "markdown",
|
32 |
-
"metadata": {
|
33 |
-
"id": "COndi88gbDgO"
|
34 |
-
},
|
35 |
-
"source": [
|
36 |
-
"**Run the following code to install Julia**"
|
37 |
-
]
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"cell_type": "code",
|
41 |
-
"execution_count": null,
|
42 |
-
"metadata": {
|
43 |
-
"colab": {
|
44 |
-
"base_uri": "https://localhost:8080/"
|
45 |
-
},
|
46 |
-
"id": "GIeFXS0F0zww",
|
47 |
-
"outputId": "5399ed75-f77f-47c5-e53b-4b2f231f2839"
|
48 |
-
},
|
49 |
-
"outputs": [],
|
50 |
-
"source": [
|
51 |
-
"!curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel 1.10"
|
52 |
-
]
|
53 |
-
},
|
54 |
-
{
|
55 |
-
"cell_type": "code",
|
56 |
-
"execution_count": null,
|
57 |
-
"metadata": {
|
58 |
-
"colab": {
|
59 |
-
"base_uri": "https://localhost:8080/"
|
60 |
-
},
|
61 |
-
"id": "Iu9X-Y-YNmwM",
|
62 |
-
"outputId": "ee14af65-043a-4ad6-efa0-3cdcc48a4eb8"
|
63 |
-
},
|
64 |
-
"outputs": [],
|
65 |
-
"source": [
|
66 |
-
"# Make julia available on PATH:\n",
|
67 |
-
"!ln -s $HOME/.juliaup/bin/julia /usr/local/bin/julia\n",
|
68 |
-
"\n",
|
69 |
-
"# Test it works:\n",
|
70 |
-
"!julia --version"
|
71 |
-
]
|
72 |
-
},
|
73 |
-
{
|
74 |
-
"cell_type": "markdown",
|
75 |
-
"metadata": {
|
76 |
-
"id": "ORv1c6xvbDgV"
|
77 |
-
},
|
78 |
-
"source": [
|
79 |
-
"Install PySR"
|
80 |
]
|
81 |
},
|
82 |
{
|
@@ -91,36 +32,23 @@
|
|
91 |
},
|
92 |
"outputs": [],
|
93 |
"source": [
|
94 |
-
"!pip install
|
95 |
]
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "markdown",
|
99 |
-
"metadata": {
|
100 |
-
"id": "etTMEV0wDqld"
|
101 |
-
},
|
102 |
"source": [
|
103 |
-
"
|
104 |
]
|
105 |
},
|
106 |
{
|
107 |
"cell_type": "code",
|
108 |
"execution_count": null,
|
109 |
-
"metadata": {
|
110 |
-
"id": "j666aOI8xWF_"
|
111 |
-
},
|
112 |
"outputs": [],
|
113 |
"source": [
|
114 |
-
"
|
115 |
-
" from pysr.julia_helpers import init_julia\n",
|
116 |
-
" from julia.tools import redirect_output_streams\n",
|
117 |
-
"\n",
|
118 |
-
" julia_kwargs = dict(optimize=3, threads=\"auto\", compiled_modules=False)\n",
|
119 |
-
" init_julia(julia_kwargs=julia_kwargs)\n",
|
120 |
-
" redirect_output_streams()\n",
|
121 |
-
"\n",
|
122 |
-
"\n",
|
123 |
-
"init_colab_printing()"
|
124 |
]
|
125 |
},
|
126 |
{
|
@@ -129,7 +57,7 @@
|
|
129 |
"id": "qeCPKd9wldEK"
|
130 |
},
|
131 |
"source": [
|
132 |
-
"Now, let's import
|
133 |
]
|
134 |
},
|
135 |
{
|
@@ -233,7 +161,7 @@
|
|
233 |
" niterations=30,\n",
|
234 |
" binary_operators=[\"+\", \"*\"],\n",
|
235 |
" unary_operators=[\"cos\", \"exp\", \"sin\"],\n",
|
236 |
-
" **default_pysr_params
|
237 |
")\n",
|
238 |
"\n",
|
239 |
"model.fit(X, y)"
|
@@ -648,7 +576,7 @@
|
|
648 |
"outputs": [],
|
649 |
"source": [
|
650 |
"model = PySRRegressor(\n",
|
651 |
-
"
|
652 |
" niterations=20,\n",
|
653 |
" populations=20, # Use more populations\n",
|
654 |
" binary_operators=[\"+\", \"*\"],\n",
|
@@ -815,26 +743,7 @@
|
|
815 |
"where $p_i$ is the $i$th prime number, and $x$ is the input feature.\n",
|
816 |
"\n",
|
817 |
"Let's see if we can discover this using\n",
|
818 |
-
"the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package
|
819 |
-
"\n",
|
820 |
-
"First, let's get the Julia backend\n",
|
821 |
-
"Here, we might choose to manually specify unlimited threads, `-O3`,\n",
|
822 |
-
"and `compile_modules=False`, although this will only propagate if Julia has not yet started:"
|
823 |
-
]
|
824 |
-
},
|
825 |
-
{
|
826 |
-
"cell_type": "code",
|
827 |
-
"execution_count": null,
|
828 |
-
"metadata": {
|
829 |
-
"id": "yUC4BMuHG-KN"
|
830 |
-
},
|
831 |
-
"outputs": [],
|
832 |
-
"source": [
|
833 |
-
"import pysr\n",
|
834 |
-
"\n",
|
835 |
-
"jl = pysr.julia_helpers.init_julia(\n",
|
836 |
-
" julia_kwargs=dict(optimize=3, threads=\"auto\", compiled_modules=False)\n",
|
837 |
-
")"
|
838 |
]
|
839 |
},
|
840 |
{
|
@@ -859,7 +768,9 @@
|
|
859 |
},
|
860 |
"outputs": [],
|
861 |
"source": [
|
862 |
-
"jl
|
|
|
|
|
863 |
" \"\"\"\n",
|
864 |
"import Pkg\n",
|
865 |
"Pkg.add(\"Primes\")\n",
|
@@ -885,7 +796,24 @@
|
|
885 |
},
|
886 |
"outputs": [],
|
887 |
"source": [
|
888 |
-
"jl.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
889 |
]
|
890 |
},
|
891 |
{
|
@@ -906,11 +834,11 @@
|
|
906 |
},
|
907 |
"outputs": [],
|
908 |
"source": [
|
909 |
-
"jl.
|
910 |
" \"\"\"\n",
|
911 |
"function p(i::T) where T\n",
|
912 |
" if 0.5 < i < 1000\n",
|
913 |
-
" return T(
|
914 |
" else\n",
|
915 |
" return T(NaN)\n",
|
916 |
" end\n",
|
@@ -919,6 +847,29 @@
|
|
919 |
")"
|
920 |
]
|
921 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
922 |
{
|
923 |
"cell_type": "markdown",
|
924 |
"metadata": {
|
@@ -947,7 +898,7 @@
|
|
947 |
"(However, note that this version assumes 64-bit float input, rather than any input type `T`)\n",
|
948 |
"\n",
|
949 |
"Next, let's generate a list of primes for our test dataset.\n",
|
950 |
-
"Since we are using
|
951 |
]
|
952 |
},
|
953 |
{
|
@@ -1382,7 +1333,7 @@
|
|
1382 |
"\n",
|
1383 |
"> **Warning**\n",
|
1384 |
">\n",
|
1385 |
-
"> First, let's save the data, because sometimes PyTorch and
|
1386 |
]
|
1387 |
},
|
1388 |
{
|
@@ -1413,7 +1364,7 @@
|
|
1413 |
"id": "krhaNlwFG-KT"
|
1414 |
},
|
1415 |
"source": [
|
1416 |
-
"We can now load the data, including after a crash (be sure to re-run the import cells at the top of this notebook, including the one that starts
|
1417 |
]
|
1418 |
},
|
1419 |
{
|
@@ -1467,7 +1418,7 @@
|
|
1467 |
"id": "1a738a33"
|
1468 |
},
|
1469 |
"source": [
|
1470 |
-
"If this segfaults, restart the notebook, and run the initial imports and
|
1471 |
]
|
1472 |
},
|
1473 |
{
|
|
|
15 |
"id": "tQ1r1bbb0yBv"
|
16 |
},
|
17 |
"source": [
|
|
|
18 |
"## Instructions\n",
|
19 |
"1. Work on a copy of this notebook: _File_ > _Save a copy in Drive_ (you will need a Google account).\n",
|
20 |
+
"2. (Optional) If you would like to do the deep learning component of this tutorial, turn on the GPU with Edit->Notebook settings->Hardware accelerator->GPU\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
]
|
22 |
},
|
23 |
{
|
|
|
32 |
},
|
33 |
"outputs": [],
|
34 |
"source": [
|
35 |
+
"!pip install -U pysr"
|
36 |
]
|
37 |
},
|
38 |
{
|
39 |
"cell_type": "markdown",
|
40 |
+
"metadata": {},
|
|
|
|
|
41 |
"source": [
|
42 |
+
"Julia and Julia dependencies are installed at first import:"
|
43 |
]
|
44 |
},
|
45 |
{
|
46 |
"cell_type": "code",
|
47 |
"execution_count": null,
|
48 |
+
"metadata": {},
|
|
|
|
|
49 |
"outputs": [],
|
50 |
"source": [
|
51 |
+
"import pysr"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
]
|
53 |
},
|
54 |
{
|
|
|
57 |
"id": "qeCPKd9wldEK"
|
58 |
},
|
59 |
"source": [
|
60 |
+
"Now, let's import everything else as well as the PySRRegressor:\n"
|
61 |
]
|
62 |
},
|
63 |
{
|
|
|
161 |
" niterations=30,\n",
|
162 |
" binary_operators=[\"+\", \"*\"],\n",
|
163 |
" unary_operators=[\"cos\", \"exp\", \"sin\"],\n",
|
164 |
+
" **default_pysr_params,\n",
|
165 |
")\n",
|
166 |
"\n",
|
167 |
"model.fit(X, y)"
|
|
|
576 |
"outputs": [],
|
577 |
"source": [
|
578 |
"model = PySRRegressor(\n",
|
579 |
+
" elementwise_loss=\"myloss(x, y, w) = w * abs(x - y)\", # Custom loss function with weights.\n",
|
580 |
" niterations=20,\n",
|
581 |
" populations=20, # Use more populations\n",
|
582 |
" binary_operators=[\"+\", \"*\"],\n",
|
|
|
743 |
"where $p_i$ is the $i$th prime number, and $x$ is the input feature.\n",
|
744 |
"\n",
|
745 |
"Let's see if we can discover this using\n",
|
746 |
+
"the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
747 |
]
|
748 |
},
|
749 |
{
|
|
|
768 |
},
|
769 |
"outputs": [],
|
770 |
"source": [
|
771 |
+
"from pysr import jl\n",
|
772 |
+
"\n",
|
773 |
+
"jl.seval(\n",
|
774 |
" \"\"\"\n",
|
775 |
"import Pkg\n",
|
776 |
"Pkg.add(\"Primes\")\n",
|
|
|
796 |
},
|
797 |
"outputs": [],
|
798 |
"source": [
|
799 |
+
"jl.seval(\"using Primes: prime\")"
|
800 |
+
]
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"cell_type": "markdown",
|
804 |
+
"metadata": {},
|
805 |
+
"source": [
|
806 |
+
"Note that PySR should automatically load the `juliacall.ipython` extension for you,\n",
|
807 |
+
"which means that you can also execute Julia code in the notebook using the `%%julia` magic:"
|
808 |
+
]
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"cell_type": "code",
|
812 |
+
"execution_count": null,
|
813 |
+
"metadata": {},
|
814 |
+
"outputs": [],
|
815 |
+
"source": [
|
816 |
+
"%julia using Primes: prime"
|
817 |
]
|
818 |
},
|
819 |
{
|
|
|
834 |
},
|
835 |
"outputs": [],
|
836 |
"source": [
|
837 |
+
"jl.seval(\n",
|
838 |
" \"\"\"\n",
|
839 |
"function p(i::T) where T\n",
|
840 |
" if 0.5 < i < 1000\n",
|
841 |
+
" return T(prime(round(Int, i)))\n",
|
842 |
" else\n",
|
843 |
" return T(NaN)\n",
|
844 |
" end\n",
|
|
|
847 |
")"
|
848 |
]
|
849 |
},
|
850 |
+
{
|
851 |
+
"cell_type": "markdown",
|
852 |
+
"metadata": {},
|
853 |
+
"source": [
|
854 |
+
"Or, equivalently:"
|
855 |
+
]
|
856 |
+
},
|
857 |
+
{
|
858 |
+
"cell_type": "code",
|
859 |
+
"execution_count": null,
|
860 |
+
"metadata": {},
|
861 |
+
"outputs": [],
|
862 |
+
"source": [
|
863 |
+
"%%julia\n",
|
864 |
+
"function p(i::T) where T\n",
|
865 |
+
" if 0.5 < i < 1000\n",
|
866 |
+
" return T(prime(round(Int, i)))\n",
|
867 |
+
" else\n",
|
868 |
+
" return T(NaN)\n",
|
869 |
+
" end\n",
|
870 |
+
"end"
|
871 |
+
]
|
872 |
+
},
|
873 |
{
|
874 |
"cell_type": "markdown",
|
875 |
"metadata": {
|
|
|
898 |
"(However, note that this version assumes 64-bit float input, rather than any input type `T`)\n",
|
899 |
"\n",
|
900 |
"Next, let's generate a list of primes for our test dataset.\n",
|
901 |
+
"Since we are using juliacall, we can just call `p` directly to do this:\n"
|
902 |
]
|
903 |
},
|
904 |
{
|
|
|
1333 |
"\n",
|
1334 |
"> **Warning**\n",
|
1335 |
">\n",
|
1336 |
+
"> First, let's save the data, because sometimes PyTorch and juliacall's C bindings interfere and cause the colab kernel to crash. If we need to restart, we can just load the data without having to retrain the network:"
|
1337 |
]
|
1338 |
},
|
1339 |
{
|
|
|
1364 |
"id": "krhaNlwFG-KT"
|
1365 |
},
|
1366 |
"source": [
|
1367 |
+
"We can now load the data, including after a crash (be sure to re-run the import cells at the top of this notebook, including the one that starts juliacall)."
|
1368 |
]
|
1369 |
},
|
1370 |
{
|
|
|
1418 |
"id": "1a738a33"
|
1419 |
},
|
1420 |
"source": [
|
1421 |
+
"If this segfaults, restart the notebook, and run the initial imports and juliacall part, but skip the PyTorch training. This is because PyTorch's C binding tends to interefere with juliacall. You can then re-run the `pkl.load` cell to import the data."
|
1422 |
]
|
1423 |
},
|
1424 |
{
|
pyproject.toml
CHANGED
@@ -1,2 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
[tool.isort]
|
2 |
profile = "black"
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["setuptools"]
|
3 |
+
build-backend = "setuptools.build_meta"
|
4 |
+
|
5 |
+
[project]
|
6 |
+
name = "pysr"
|
7 |
+
version = "0.17.0"
|
8 |
+
authors = [
|
9 |
+
{name = "Miles Cranmer", email = "miles.cranmer@gmail.com"},
|
10 |
+
]
|
11 |
+
description = "Simple and efficient symbolic regression"
|
12 |
+
readme = {file = "README.md", content-type = "text/markdown"}
|
13 |
+
license = {file = "LICENSE"}
|
14 |
+
requires-python = ">=3.7"
|
15 |
+
classifiers = [
|
16 |
+
"Programming Language :: Python :: 3",
|
17 |
+
"Operating System :: OS Independent",
|
18 |
+
"License :: OSI Approved :: Apache Software License"
|
19 |
+
]
|
20 |
+
dynamic = ["dependencies"]
|
21 |
+
|
22 |
+
[tool.setuptools]
|
23 |
+
packages = ["pysr", "pysr._cli", "pysr.test"]
|
24 |
+
include-package-data = false
|
25 |
+
package-data = {pysr = ["juliapkg.json"]}
|
26 |
+
|
27 |
+
[tool.setuptools.dynamic]
|
28 |
+
dependencies = {file = "requirements.txt"}
|
29 |
+
|
30 |
[tool.isort]
|
31 |
profile = "black"
|
pysr/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
version.py
|
pysr/__init__.py
CHANGED
@@ -1,26 +1,23 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
warnings.warn(
|
6 |
-
"PySR experiences occassional segfaults with Python 3.12. "
|
7 |
-
+ "Please use an earlier version of Python with PySR until this issue is resolved."
|
8 |
-
)
|
9 |
|
10 |
from . import sklearn_monkeypatch
|
11 |
-
from .deprecated import best, best_callable, best_row, best_tex, pysr
|
12 |
from .export_jax import sympy2jax
|
13 |
from .export_torch import sympy2torch
|
14 |
-
from .feynman_problems import FeynmanProblem, Problem
|
15 |
-
from .julia_helpers import install
|
16 |
from .sr import PySRRegressor
|
|
|
|
|
17 |
from .version import __version__
|
18 |
|
19 |
__all__ = [
|
|
|
|
|
20 |
"sklearn_monkeypatch",
|
21 |
"sympy2jax",
|
22 |
"sympy2torch",
|
23 |
-
"FeynmanProblem",
|
24 |
"Problem",
|
25 |
"install",
|
26 |
"PySRRegressor",
|
|
|
1 |
+
# This must be imported as early as possible to prevent
|
2 |
+
# library linking issues caused by numpy/pytorch/etc. importing
|
3 |
+
# old libraries:
|
4 |
+
from .julia_import import jl, SymbolicRegression # isort:skip
|
|
|
|
|
|
|
|
|
5 |
|
6 |
from . import sklearn_monkeypatch
|
7 |
+
from .deprecated import best, best_callable, best_row, best_tex, install, pysr
|
8 |
from .export_jax import sympy2jax
|
9 |
from .export_torch import sympy2torch
|
|
|
|
|
10 |
from .sr import PySRRegressor
|
11 |
+
|
12 |
+
# This file is created by setuptools_scm during the build process:
|
13 |
from .version import __version__
|
14 |
|
15 |
__all__ = [
|
16 |
+
"jl",
|
17 |
+
"SymbolicRegression",
|
18 |
"sklearn_monkeypatch",
|
19 |
"sympy2jax",
|
20 |
"sympy2torch",
|
|
|
21 |
"Problem",
|
22 |
"install",
|
23 |
"PySRRegressor",
|
pysr/__main__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
|
3 |
if __name__ == "__main__":
|
4 |
_cli(prog_name="pysr")
|
|
|
1 |
+
from ._cli.main import pysr as _cli
|
2 |
|
3 |
if __name__ == "__main__":
|
4 |
_cli(prog_name="pysr")
|
pysr/_cli/main.py
CHANGED
@@ -1,6 +1,17 @@
|
|
|
|
|
|
|
|
|
|
1 |
import click
|
2 |
|
3 |
-
from ..
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
@click.group("pysr")
|
@@ -9,15 +20,13 @@ def pysr(context):
|
|
9 |
ctx = context
|
10 |
|
11 |
|
12 |
-
@pysr.command("install", help="
|
13 |
@click.option(
|
14 |
"-p",
|
15 |
"julia_project",
|
16 |
"--project",
|
17 |
default=None,
|
18 |
type=str,
|
19 |
-
help="Install in a specific Julia project (e.g., a local copy of SymbolicRegression.jl).",
|
20 |
-
metavar="PROJECT_DIRECTORY",
|
21 |
)
|
22 |
@click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.")
|
23 |
@click.option(
|
@@ -25,14 +34,55 @@ def pysr(context):
|
|
25 |
"precompile",
|
26 |
flag_value=True,
|
27 |
default=None,
|
28 |
-
help="Force precompilation of Julia libraries.",
|
29 |
)
|
30 |
@click.option(
|
31 |
"--no-precompile",
|
32 |
"precompile",
|
33 |
flag_value=False,
|
34 |
default=None,
|
35 |
-
help="Disable precompilation.",
|
36 |
)
|
37 |
def _install(julia_project, quiet, precompile):
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import unittest
|
3 |
+
import warnings
|
4 |
+
|
5 |
import click
|
6 |
|
7 |
+
from ..test import (
|
8 |
+
get_runtests_cli,
|
9 |
+
runtests,
|
10 |
+
runtests_dev,
|
11 |
+
runtests_jax,
|
12 |
+
runtests_startup,
|
13 |
+
runtests_torch,
|
14 |
+
)
|
15 |
|
16 |
|
17 |
@click.group("pysr")
|
|
|
20 |
ctx = context
|
21 |
|
22 |
|
23 |
+
@pysr.command("install", help="DEPRECATED (dependencies are now installed at import).")
|
24 |
@click.option(
|
25 |
"-p",
|
26 |
"julia_project",
|
27 |
"--project",
|
28 |
default=None,
|
29 |
type=str,
|
|
|
|
|
30 |
)
|
31 |
@click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.")
|
32 |
@click.option(
|
|
|
34 |
"precompile",
|
35 |
flag_value=True,
|
36 |
default=None,
|
|
|
37 |
)
|
38 |
@click.option(
|
39 |
"--no-precompile",
|
40 |
"precompile",
|
41 |
flag_value=False,
|
42 |
default=None,
|
|
|
43 |
)
|
44 |
def _install(julia_project, quiet, precompile):
|
45 |
+
warnings.warn(
|
46 |
+
"This command is deprecated. Julia dependencies are now installed at first import."
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"}
|
51 |
+
|
52 |
+
|
53 |
+
@pysr.command("test")
|
54 |
+
@click.argument("tests", nargs=1)
|
55 |
+
def _tests(tests):
|
56 |
+
"""Run parts of the PySR test suite.
|
57 |
+
|
58 |
+
Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
|
59 |
+
"""
|
60 |
+
test_cases = []
|
61 |
+
for test in tests.split(","):
|
62 |
+
if test == "main":
|
63 |
+
test_cases.extend(runtests(just_tests=True))
|
64 |
+
elif test == "jax":
|
65 |
+
test_cases.extend(runtests_jax(just_tests=True))
|
66 |
+
elif test == "torch":
|
67 |
+
test_cases.extend(runtests_torch(just_tests=True))
|
68 |
+
elif test == "cli":
|
69 |
+
runtests_cli = get_runtests_cli()
|
70 |
+
test_cases.extend(runtests_cli(just_tests=True))
|
71 |
+
elif test == "dev":
|
72 |
+
test_cases.extend(runtests_dev(just_tests=True))
|
73 |
+
elif test == "startup":
|
74 |
+
test_cases.extend(runtests_startup(just_tests=True))
|
75 |
+
else:
|
76 |
+
warnings.warn(f"Invalid test {test}. Skipping.")
|
77 |
+
|
78 |
+
loader = unittest.TestLoader()
|
79 |
+
suite = unittest.TestSuite()
|
80 |
+
for test_case in test_cases:
|
81 |
+
suite.addTests(loader.loadTestsFromTestCase(test_case))
|
82 |
+
runner = unittest.TextTestRunner()
|
83 |
+
results = runner.run(suite)
|
84 |
+
# Normally unittest would run this, but here we have
|
85 |
+
# to do it manually to get the exit code.
|
86 |
+
|
87 |
+
if not results.wasSuccessful():
|
88 |
+
sys.exit(1)
|
pysr/deprecated.py
CHANGED
@@ -1,6 +1,27 @@
|
|
1 |
"""Various functions to deprecate features."""
|
2 |
import warnings
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def pysr(X, y, weights=None, **kwargs): # pragma: no cover
|
6 |
from .sr import PySRRegressor
|
@@ -55,38 +76,28 @@ def best_callable(*args, **kwargs): # pragma: no cover
|
|
55 |
)
|
56 |
|
57 |
|
58 |
-
|
59 |
-
""
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
# Turn this into a dict:
|
84 |
-
deprecated_kwargs = {}
|
85 |
-
for line in deprecation_string.splitlines():
|
86 |
-
line = line.replace(" ", "")
|
87 |
-
if line == "":
|
88 |
-
continue
|
89 |
-
old, new = line.split("=>")
|
90 |
-
deprecated_kwargs[old] = new
|
91 |
-
|
92 |
-
return deprecated_kwargs
|
|
|
1 |
"""Various functions to deprecate features."""
|
2 |
import warnings
|
3 |
|
4 |
+
from .julia_import import jl
|
5 |
+
|
6 |
+
|
7 |
+
def install(*args, **kwargs):
|
8 |
+
del args, kwargs
|
9 |
+
warnings.warn(
|
10 |
+
"The `install` function has been removed. "
|
11 |
+
"PySR now uses the `juliacall` package to install its dependencies automatically at import time. ",
|
12 |
+
FutureWarning,
|
13 |
+
)
|
14 |
+
|
15 |
+
|
16 |
+
def init_julia(*args, **kwargs):
|
17 |
+
del args, kwargs
|
18 |
+
warnings.warn(
|
19 |
+
"The `init_julia` function has been removed. "
|
20 |
+
"Julia is now initialized automatically at import time.",
|
21 |
+
FutureWarning,
|
22 |
+
)
|
23 |
+
return jl
|
24 |
+
|
25 |
|
26 |
def pysr(X, y, weights=None, **kwargs): # pragma: no cover
|
27 |
from .sr import PySRRegressor
|
|
|
76 |
)
|
77 |
|
78 |
|
79 |
+
DEPRECATED_KWARGS = {
|
80 |
+
"fractionReplaced": "fraction_replaced",
|
81 |
+
"fractionReplacedHof": "fraction_replaced_hof",
|
82 |
+
"npop": "population_size",
|
83 |
+
"hofMigration": "hof_migration",
|
84 |
+
"shouldOptimizeConstants": "should_optimize_constants",
|
85 |
+
"weightAddNode": "weight_add_node",
|
86 |
+
"weightDeleteNode": "weight_delete_node",
|
87 |
+
"weightDoNothing": "weight_do_nothing",
|
88 |
+
"weightInsertNode": "weight_insert_node",
|
89 |
+
"weightMutateConstant": "weight_mutate_constant",
|
90 |
+
"weightMutateOperator": "weight_mutate_operator",
|
91 |
+
"weightSwapOperands": "weight_swap_operands",
|
92 |
+
"weightRandomize": "weight_randomize",
|
93 |
+
"weightSimplify": "weight_simplify",
|
94 |
+
"crossoverProbability": "crossover_probability",
|
95 |
+
"perturbationFactor": "perturbation_factor",
|
96 |
+
"batchSize": "batch_size",
|
97 |
+
"warmupMaxsizeBy": "warmup_maxsize_by",
|
98 |
+
"useFrequency": "use_frequency",
|
99 |
+
"useFrequencyInTournament": "use_frequency_in_tournament",
|
100 |
+
"ncyclesperiteration": "ncycles_per_iteration",
|
101 |
+
"loss": "elementwise_loss",
|
102 |
+
"full_objective": "loss_function",
|
103 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pysr/feynman_problems.py
DELETED
@@ -1,176 +0,0 @@
|
|
1 |
-
import csv
|
2 |
-
from functools import partial
|
3 |
-
from pathlib import Path
|
4 |
-
|
5 |
-
import numpy as np
|
6 |
-
|
7 |
-
from .deprecated import best, pysr
|
8 |
-
|
9 |
-
PKG_DIR = Path(__file__).parents[1]
|
10 |
-
FEYNMAN_DATASET = PKG_DIR / "datasets" / "FeynmanEquations.csv"
|
11 |
-
|
12 |
-
|
13 |
-
class Problem:
|
14 |
-
"""
|
15 |
-
Problem API to work with PySR.
|
16 |
-
|
17 |
-
Has attributes: X, y as pysr accepts, form which is a string representing the correct equation and variable_names
|
18 |
-
|
19 |
-
Should be able to call pysr(problem.X, problem.y, var_names=problem.var_names) and have it work
|
20 |
-
"""
|
21 |
-
|
22 |
-
def __init__(self, X, y, form=None, variable_names=None):
|
23 |
-
self.X = X
|
24 |
-
self.y = y
|
25 |
-
self.form = form
|
26 |
-
self.variable_names = variable_names
|
27 |
-
|
28 |
-
|
29 |
-
class FeynmanProblem(Problem):
|
30 |
-
"""
|
31 |
-
Stores the data for the problems from the 100 Feynman Equations on Physics.
|
32 |
-
This is the benchmark used in the AI Feynman Paper
|
33 |
-
"""
|
34 |
-
|
35 |
-
def __init__(self, row, gen=False, dp=500):
|
36 |
-
"""
|
37 |
-
row: a row read as a dict from the FeynmanEquations dataset provided in the datasets folder of the repo
|
38 |
-
gen: If true the problem will have dp X and y values randomly generated else they will be None
|
39 |
-
"""
|
40 |
-
self.eq_id = row["Filename"]
|
41 |
-
self.n_vars = int(row["# variables"])
|
42 |
-
super(FeynmanProblem, self).__init__(
|
43 |
-
None,
|
44 |
-
None,
|
45 |
-
form=row["Formula"],
|
46 |
-
variable_names=[row[f"v{i + 1}_name"] for i in range(self.n_vars)],
|
47 |
-
)
|
48 |
-
self.low = [float(row[f"v{i+1}_low"]) for i in range(self.n_vars)]
|
49 |
-
self.high = [float(row[f"v{i+1}_high"]) for i in range(self.n_vars)]
|
50 |
-
self.dp = dp
|
51 |
-
if gen:
|
52 |
-
self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
|
53 |
-
d = {}
|
54 |
-
for var in range(len(self.variable_names)):
|
55 |
-
d[self.variable_names[var]] = self.X[:, var]
|
56 |
-
d["exp"] = np.exp
|
57 |
-
d["sqrt"] = np.sqrt
|
58 |
-
d["pi"] = np.pi
|
59 |
-
d["cos"] = np.cos
|
60 |
-
d["sin"] = np.sin
|
61 |
-
d["tan"] = np.tan
|
62 |
-
d["tanh"] = np.tanh
|
63 |
-
d["ln"] = np.log
|
64 |
-
d["log"] = np.log # Quite sure the Feynman dataset has no base 10 logs
|
65 |
-
d["arcsin"] = np.arcsin
|
66 |
-
self.y = eval(self.form, d)
|
67 |
-
|
68 |
-
def __str__(self):
|
69 |
-
return f"Feynman Equation: {self.eq_id}|Form: {self.form}"
|
70 |
-
|
71 |
-
def __repr__(self):
|
72 |
-
return str(self)
|
73 |
-
|
74 |
-
|
75 |
-
def mk_problems(first=100, gen=False, dp=500, data_dir=FEYNMAN_DATASET):
|
76 |
-
"""
|
77 |
-
|
78 |
-
first: the first "first" equations from the dataset will be made into problems
|
79 |
-
data_dir: the path pointing to the Feynman Equations csv
|
80 |
-
returns: list of FeynmanProblems
|
81 |
-
"""
|
82 |
-
ret = []
|
83 |
-
with open(data_dir) as csvfile:
|
84 |
-
reader = csv.DictReader(csvfile)
|
85 |
-
for i, row in enumerate(reader):
|
86 |
-
if i > first:
|
87 |
-
break
|
88 |
-
if row["Filename"] == "":
|
89 |
-
continue
|
90 |
-
p = FeynmanProblem(row, gen=gen, dp=dp)
|
91 |
-
ret.append(p)
|
92 |
-
return ret
|
93 |
-
|
94 |
-
|
95 |
-
def run_on_problem(problem, verbosity=0, multiprocessing=True):
|
96 |
-
"""
|
97 |
-
Takes in a problem and returns a tuple: (equations, best predicted equation, actual equation)
|
98 |
-
"""
|
99 |
-
from time import time
|
100 |
-
|
101 |
-
starting = time()
|
102 |
-
equations = pysr(
|
103 |
-
problem.X,
|
104 |
-
problem.y,
|
105 |
-
variable_names=problem.variable_names,
|
106 |
-
verbosity=verbosity,
|
107 |
-
)
|
108 |
-
timing = time() - starting
|
109 |
-
others = {"time": timing, "problem": problem}
|
110 |
-
if not multiprocessing:
|
111 |
-
others["equations"] = equations
|
112 |
-
return str(best(equations)), problem.form, others
|
113 |
-
|
114 |
-
|
115 |
-
def do_feynman_experiments_parallel(
|
116 |
-
first=100,
|
117 |
-
verbosity=0,
|
118 |
-
dp=500,
|
119 |
-
output_file_path="FeynmanExperiment.csv",
|
120 |
-
data_dir=FEYNMAN_DATASET,
|
121 |
-
):
|
122 |
-
import multiprocessing as mp
|
123 |
-
|
124 |
-
from tqdm import tqdm
|
125 |
-
|
126 |
-
problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir)
|
127 |
-
ids = []
|
128 |
-
predictions = []
|
129 |
-
true_equations = []
|
130 |
-
time_takens = []
|
131 |
-
pool = mp.Pool()
|
132 |
-
results = []
|
133 |
-
with tqdm(total=len(problems)) as pbar:
|
134 |
-
f = partial(run_on_problem, verbosity=verbosity)
|
135 |
-
for i, res in enumerate(pool.imap(f, problems)):
|
136 |
-
results.append(res)
|
137 |
-
pbar.update()
|
138 |
-
for res in results:
|
139 |
-
prediction, true_equation, others = res
|
140 |
-
problem = others["problem"]
|
141 |
-
ids.append(problem.eq_id)
|
142 |
-
predictions.append(prediction)
|
143 |
-
true_equations.append(true_equation)
|
144 |
-
time_takens.append(others["time"])
|
145 |
-
with open(output_file_path, "a") as f:
|
146 |
-
writer = csv.writer(f, delimiter=",")
|
147 |
-
writer.writerow(["ID", "Predicted", "True", "Time"])
|
148 |
-
for i in range(len(ids)):
|
149 |
-
writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]])
|
150 |
-
|
151 |
-
|
152 |
-
def do_feynman_experiments(
|
153 |
-
first=100,
|
154 |
-
verbosity=0,
|
155 |
-
dp=500,
|
156 |
-
output_file_path="FeynmanExperiment.csv",
|
157 |
-
data_dir=FEYNMAN_DATASET,
|
158 |
-
):
|
159 |
-
from tqdm import tqdm
|
160 |
-
|
161 |
-
problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir)
|
162 |
-
ids = []
|
163 |
-
predictions = []
|
164 |
-
true_equations = []
|
165 |
-
time_takens = []
|
166 |
-
for problem in tqdm(problems):
|
167 |
-
prediction, true_equation, others = run_on_problem(problem, verbosity)
|
168 |
-
ids.append(problem.eq_id)
|
169 |
-
predictions.append(prediction)
|
170 |
-
true_equations.append(true_equation)
|
171 |
-
time_takens.append(others["time"])
|
172 |
-
with open(output_file_path, "a") as f:
|
173 |
-
writer = csv.writer(f, delimiter=",")
|
174 |
-
writer.writerow(["ID", "Predicted", "True", "Time"])
|
175 |
-
for i in range(len(ids)):
|
176 |
-
writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pysr/julia_helpers.py
CHANGED
@@ -1,284 +1,18 @@
|
|
1 |
"""Functions for initializing the Julia environment and installing deps."""
|
2 |
-
import os
|
3 |
-
import subprocess
|
4 |
-
import sys
|
5 |
-
import warnings
|
6 |
-
from pathlib import Path
|
7 |
|
8 |
-
|
|
|
9 |
|
10 |
-
from .
|
|
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
julia_kwargs_at_initialization = None
|
15 |
-
julia_activated_env = None
|
16 |
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
"""Execute julia.core.JuliaInfo.load(), and store as juliainfo."""
|
20 |
-
global juliainfo
|
21 |
-
|
22 |
-
if juliainfo is None:
|
23 |
-
from julia.core import JuliaInfo
|
24 |
-
|
25 |
-
try:
|
26 |
-
juliainfo = JuliaInfo.load(julia="julia")
|
27 |
-
except FileNotFoundError:
|
28 |
-
env_path = os.environ["PATH"]
|
29 |
-
raise FileNotFoundError(
|
30 |
-
f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
|
31 |
-
)
|
32 |
-
|
33 |
-
return juliainfo
|
34 |
-
|
35 |
-
|
36 |
-
def _get_julia_env_dir():
|
37 |
-
# Have to manually get env dir:
|
38 |
-
try:
|
39 |
-
julia_env_dir_str = subprocess.run(
|
40 |
-
["julia", "-e using Pkg; print(Pkg.envdir())"],
|
41 |
-
capture_output=True,
|
42 |
-
env=os.environ,
|
43 |
-
).stdout.decode()
|
44 |
-
except FileNotFoundError:
|
45 |
-
env_path = os.environ["PATH"]
|
46 |
-
raise FileNotFoundError(
|
47 |
-
f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
|
48 |
-
)
|
49 |
-
return Path(julia_env_dir_str)
|
50 |
-
|
51 |
-
|
52 |
-
def _set_julia_project_env(julia_project, is_shared):
|
53 |
-
if is_shared:
|
54 |
-
if is_julia_version_greater_eq(version=(1, 7, 0)):
|
55 |
-
os.environ["JULIA_PROJECT"] = "@" + str(julia_project)
|
56 |
-
else:
|
57 |
-
julia_env_dir = _get_julia_env_dir()
|
58 |
-
os.environ["JULIA_PROJECT"] = str(julia_env_dir / julia_project)
|
59 |
-
else:
|
60 |
-
os.environ["JULIA_PROJECT"] = str(julia_project)
|
61 |
-
|
62 |
-
|
63 |
-
def _get_io_arg(quiet):
|
64 |
-
io = "devnull" if quiet else "stderr"
|
65 |
-
io_arg = f"io={io}" if is_julia_version_greater_eq(version=(1, 6, 0)) else ""
|
66 |
-
return io_arg
|
67 |
-
|
68 |
-
|
69 |
-
def install(julia_project=None, quiet=False, precompile=None): # pragma: no cover
|
70 |
-
"""
|
71 |
-
Install PyCall.jl and all required dependencies for SymbolicRegression.jl.
|
72 |
-
|
73 |
-
Also updates the local Julia registry.
|
74 |
-
"""
|
75 |
-
import julia
|
76 |
-
|
77 |
-
_julia_version_assertion()
|
78 |
-
# Set JULIA_PROJECT so that we install in the pysr environment
|
79 |
-
processed_julia_project, is_shared = _process_julia_project(julia_project)
|
80 |
-
_set_julia_project_env(processed_julia_project, is_shared)
|
81 |
-
|
82 |
-
if precompile == False:
|
83 |
-
os.environ["JULIA_PKG_PRECOMPILE_AUTO"] = "0"
|
84 |
-
|
85 |
-
try:
|
86 |
-
julia.install(quiet=quiet)
|
87 |
-
except julia.tools.PyCallInstallError:
|
88 |
-
# Attempt to reset PyCall.jl's build:
|
89 |
-
subprocess.run(
|
90 |
-
[
|
91 |
-
"julia",
|
92 |
-
"-e",
|
93 |
-
f'ENV["PYTHON"] = "{sys.executable}"; import Pkg; Pkg.build("PyCall")',
|
94 |
-
],
|
95 |
-
)
|
96 |
-
# Try installing again:
|
97 |
-
try:
|
98 |
-
julia.install(quiet=quiet)
|
99 |
-
except julia.tools.PyCallInstallError:
|
100 |
-
warnings.warn(
|
101 |
-
"PyCall.jl failed to install on second attempt. "
|
102 |
-
+ "Please consult the GitHub issue "
|
103 |
-
+ "https://github.com/MilesCranmer/PySR/issues/257 "
|
104 |
-
+ "for advice on fixing this."
|
105 |
-
)
|
106 |
-
|
107 |
-
Main, init_log = init_julia(julia_project, quiet=quiet, return_aux=True)
|
108 |
-
io_arg = _get_io_arg(quiet)
|
109 |
-
|
110 |
-
if precompile is None:
|
111 |
-
precompile = init_log["compiled_modules"]
|
112 |
-
|
113 |
-
if not precompile:
|
114 |
-
Main.eval('ENV["JULIA_PKG_PRECOMPILE_AUTO"] = 0')
|
115 |
-
|
116 |
-
if is_shared:
|
117 |
-
# Install SymbolicRegression.jl:
|
118 |
-
_add_sr_to_julia_project(Main, io_arg)
|
119 |
-
|
120 |
-
Main.eval("using Pkg")
|
121 |
-
Main.eval(f"Pkg.instantiate({io_arg})")
|
122 |
-
|
123 |
-
if precompile:
|
124 |
-
Main.eval(f"Pkg.precompile({io_arg})")
|
125 |
-
|
126 |
-
if not quiet:
|
127 |
-
warnings.warn(
|
128 |
-
"It is recommended to restart Python after installing PySR's dependencies,"
|
129 |
-
" so that the Julia environment is properly initialized."
|
130 |
-
)
|
131 |
-
|
132 |
-
|
133 |
-
def _import_error():
|
134 |
-
return """
|
135 |
-
Required dependencies are not installed or built. Run the following command in your terminal:
|
136 |
-
python3 -m pysr install
|
137 |
-
"""
|
138 |
-
|
139 |
-
|
140 |
-
def _process_julia_project(julia_project):
|
141 |
-
if julia_project is None:
|
142 |
-
is_shared = True
|
143 |
-
processed_julia_project = f"pysr-{__version__}"
|
144 |
-
elif julia_project[0] == "@":
|
145 |
-
is_shared = True
|
146 |
-
processed_julia_project = julia_project[1:]
|
147 |
-
else:
|
148 |
-
is_shared = False
|
149 |
-
processed_julia_project = Path(julia_project)
|
150 |
-
return processed_julia_project, is_shared
|
151 |
-
|
152 |
-
|
153 |
-
def is_julia_version_greater_eq(juliainfo=None, version=(1, 6, 0)):
|
154 |
-
"""Check if Julia version is greater than specified version."""
|
155 |
-
if juliainfo is None:
|
156 |
-
juliainfo = _load_juliainfo()
|
157 |
-
current_version = (
|
158 |
-
juliainfo.version_major,
|
159 |
-
juliainfo.version_minor,
|
160 |
-
juliainfo.version_patch,
|
161 |
-
)
|
162 |
-
return current_version >= version
|
163 |
-
|
164 |
-
|
165 |
-
def _check_for_conflicting_libraries(): # pragma: no cover
|
166 |
-
"""Check whether there are conflicting modules, and display warnings."""
|
167 |
-
# See https://github.com/pytorch/pytorch/issues/78829: importing
|
168 |
-
# pytorch before running `pysr.fit` causes a segfault.
|
169 |
-
torch_is_loaded = "torch" in sys.modules
|
170 |
-
if torch_is_loaded:
|
171 |
-
warnings.warn(
|
172 |
-
"`torch` was loaded before the Julia instance started. "
|
173 |
-
"This may cause a segfault when running `PySRRegressor.fit`. "
|
174 |
-
"To avoid this, please run `pysr.julia_helpers.init_julia()` *before* "
|
175 |
-
"importing `torch`. "
|
176 |
-
"For updates, see https://github.com/pytorch/pytorch/issues/78829"
|
177 |
-
)
|
178 |
-
|
179 |
-
|
180 |
-
def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=False):
|
181 |
-
"""Initialize julia binary, turning off compiled modules if needed."""
|
182 |
-
global julia_initialized
|
183 |
-
global julia_kwargs_at_initialization
|
184 |
-
global julia_activated_env
|
185 |
-
|
186 |
-
if not julia_initialized:
|
187 |
-
_check_for_conflicting_libraries()
|
188 |
-
|
189 |
-
if julia_kwargs is None:
|
190 |
-
julia_kwargs = {"optimize": 3}
|
191 |
-
|
192 |
-
from julia.core import JuliaInfo, UnsupportedPythonError
|
193 |
-
|
194 |
-
_julia_version_assertion()
|
195 |
-
processed_julia_project, is_shared = _process_julia_project(julia_project)
|
196 |
-
_set_julia_project_env(processed_julia_project, is_shared)
|
197 |
-
|
198 |
-
try:
|
199 |
-
info = JuliaInfo.load(julia="julia")
|
200 |
-
except FileNotFoundError:
|
201 |
-
env_path = os.environ["PATH"]
|
202 |
-
raise FileNotFoundError(
|
203 |
-
f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
|
204 |
-
)
|
205 |
-
|
206 |
-
if not info.is_pycall_built():
|
207 |
-
raise ImportError(_import_error())
|
208 |
-
|
209 |
-
from julia.core import Julia
|
210 |
-
|
211 |
-
try:
|
212 |
-
Julia(**julia_kwargs)
|
213 |
-
except UnsupportedPythonError:
|
214 |
-
# Static python binary, so we turn off pre-compiled modules.
|
215 |
-
julia_kwargs = {**julia_kwargs, "compiled_modules": False}
|
216 |
-
Julia(**julia_kwargs)
|
217 |
-
warnings.warn(
|
218 |
-
"Your system's Python library is static (e.g., conda), so precompilation will be turned off. For a dynamic library, try using `pyenv` and installing with `--enable-shared`: https://github.com/pyenv/pyenv/blob/master/plugins/python-build/README.md#building-with---enable-shared."
|
219 |
-
)
|
220 |
-
|
221 |
-
using_compiled_modules = (not "compiled_modules" in julia_kwargs) or julia_kwargs[
|
222 |
-
"compiled_modules"
|
223 |
-
]
|
224 |
-
|
225 |
-
from julia import Main as _Main
|
226 |
-
|
227 |
-
Main = _Main
|
228 |
-
|
229 |
-
if julia_activated_env is None:
|
230 |
-
julia_activated_env = processed_julia_project
|
231 |
-
|
232 |
-
if julia_initialized and julia_kwargs_at_initialization is not None:
|
233 |
-
# Check if the kwargs are the same as the previous initialization
|
234 |
-
init_set = set(julia_kwargs_at_initialization.items())
|
235 |
-
new_set = set(julia_kwargs.items())
|
236 |
-
set_diff = new_set - init_set
|
237 |
-
# Remove the `compiled_modules` key, since it is not a user-specified kwarg:
|
238 |
-
set_diff = {k: v for k, v in set_diff if k != "compiled_modules"}
|
239 |
-
if len(set_diff) > 0:
|
240 |
-
warnings.warn(
|
241 |
-
"Julia has already started. The new Julia options "
|
242 |
-
+ str(set_diff)
|
243 |
-
+ " will be ignored."
|
244 |
-
)
|
245 |
-
|
246 |
-
if julia_initialized and julia_activated_env != processed_julia_project:
|
247 |
-
Main.eval("using Pkg")
|
248 |
-
|
249 |
-
io_arg = _get_io_arg(quiet)
|
250 |
-
# Can't pass IO to Julia call as it evaluates to PyObject, so just directly
|
251 |
-
# use Main.eval:
|
252 |
-
Main.eval(
|
253 |
-
f'Pkg.activate("{_escape_filename(processed_julia_project)}",'
|
254 |
-
f"shared = Bool({int(is_shared)}), "
|
255 |
-
f"{io_arg})"
|
256 |
-
)
|
257 |
-
|
258 |
-
julia_activated_env = processed_julia_project
|
259 |
-
|
260 |
-
if not julia_initialized:
|
261 |
-
julia_kwargs_at_initialization = julia_kwargs
|
262 |
-
|
263 |
-
julia_initialized = True
|
264 |
-
if return_aux:
|
265 |
-
return Main, {"compiled_modules": using_compiled_modules}
|
266 |
-
return Main
|
267 |
-
|
268 |
-
|
269 |
-
def _add_sr_to_julia_project(Main, io_arg):
|
270 |
-
Main.eval("using Pkg")
|
271 |
-
Main.eval("Pkg.Registry.update()")
|
272 |
-
Main.sr_spec = Main.PackageSpec(
|
273 |
-
name="SymbolicRegression",
|
274 |
-
url="https://github.com/MilesCranmer/SymbolicRegression.jl",
|
275 |
-
rev="v" + __symbolic_regression_jl_version__,
|
276 |
-
)
|
277 |
-
Main.clustermanagers_spec = Main.PackageSpec(
|
278 |
-
name="ClusterManagers",
|
279 |
-
version="0.4",
|
280 |
-
)
|
281 |
-
Main.eval(f"Pkg.add([sr_spec, clustermanagers_spec], {io_arg})")
|
282 |
|
283 |
|
284 |
def _escape_filename(filename):
|
@@ -288,60 +22,27 @@ def _escape_filename(filename):
|
|
288 |
return str_repr
|
289 |
|
290 |
|
291 |
-
def
|
292 |
-
|
293 |
-
|
294 |
-
"PySR requires Julia 1.6.0 or greater. "
|
295 |
-
"Please update your Julia installation."
|
296 |
-
)
|
297 |
-
|
298 |
-
|
299 |
-
def _backend_version_assertion(Main):
|
300 |
-
try:
|
301 |
-
backend_version = Main.eval("string(SymbolicRegression.PACKAGE_VERSION)")
|
302 |
-
expected_backend_version = __symbolic_regression_jl_version__
|
303 |
-
if backend_version != expected_backend_version: # pragma: no cover
|
304 |
-
warnings.warn(
|
305 |
-
f"PySR backend (SymbolicRegression.jl) version {backend_version} "
|
306 |
-
f"does not match expected version {expected_backend_version}. "
|
307 |
-
"Things may break. "
|
308 |
-
"Please update your PySR installation with "
|
309 |
-
"`python3 -m pysr install`."
|
310 |
-
)
|
311 |
-
except JuliaError: # pragma: no cover
|
312 |
-
warnings.warn(
|
313 |
-
"You seem to have an outdated version of SymbolicRegression.jl. "
|
314 |
-
"Things may break. "
|
315 |
-
"Please update your PySR installation with "
|
316 |
-
"`python3 -m pysr install`."
|
317 |
-
)
|
318 |
-
|
319 |
-
|
320 |
-
def _load_cluster_manager(Main, cluster_manager):
|
321 |
-
Main.eval(f"import ClusterManagers: addprocs_{cluster_manager}")
|
322 |
-
return Main.eval(f"addprocs_{cluster_manager}")
|
323 |
-
|
324 |
|
325 |
-
def _update_julia_project(Main, is_shared, io_arg):
|
326 |
-
try:
|
327 |
-
if is_shared:
|
328 |
-
_add_sr_to_julia_project(Main, io_arg)
|
329 |
-
Main.eval("using Pkg")
|
330 |
-
Main.eval(f"Pkg.resolve({io_arg})")
|
331 |
-
except (JuliaError, RuntimeError) as e:
|
332 |
-
raise ImportError(_import_error()) from e
|
333 |
|
|
|
|
|
|
|
|
|
334 |
|
335 |
-
def _load_backend(Main):
|
336 |
-
try:
|
337 |
-
# Load namespace, so that various internal operators work:
|
338 |
-
Main.eval("using SymbolicRegression")
|
339 |
-
except (JuliaError, RuntimeError) as e:
|
340 |
-
raise ImportError(_import_error()) from e
|
341 |
|
342 |
-
|
|
|
|
|
|
|
343 |
|
344 |
-
# Load Julia package SymbolicRegression.jl
|
345 |
-
from julia import SymbolicRegression
|
346 |
|
347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""Functions for initializing the Julia environment and installing deps."""
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
import numpy as np
|
4 |
+
from juliacall import convert as jl_convert # type: ignore
|
5 |
|
6 |
+
from .deprecated import init_julia, install
|
7 |
+
from .julia_import import jl
|
8 |
|
9 |
+
jl.seval("using Serialization: Serialization")
|
10 |
+
jl.seval("using PythonCall: PythonCall")
|
|
|
|
|
11 |
|
12 |
+
Serialization = jl.Serialization
|
13 |
+
PythonCall = jl.PythonCall
|
14 |
|
15 |
+
jl.seval("using SymbolicRegression: plus, sub, mult, div, pow")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
def _escape_filename(filename):
|
|
|
22 |
return str_repr
|
23 |
|
24 |
|
25 |
+
def _load_cluster_manager(cluster_manager):
|
26 |
+
jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
|
27 |
+
return jl.seval(f"addprocs_{cluster_manager}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
def jl_array(x):
|
31 |
+
if x is None:
|
32 |
+
return None
|
33 |
+
return jl_convert(jl.Array, x)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
def jl_serialize(obj):
|
37 |
+
buf = jl.IOBuffer()
|
38 |
+
Serialization.serialize(buf, obj)
|
39 |
+
return np.array(jl.take_b(buf))
|
40 |
|
|
|
|
|
41 |
|
42 |
+
def jl_deserialize(s):
|
43 |
+
if s is None:
|
44 |
+
return s
|
45 |
+
buf = jl.IOBuffer()
|
46 |
+
jl.write(buf, jl_array(s))
|
47 |
+
jl.seekstart(buf)
|
48 |
+
return Serialization.deserialize(buf)
|
pysr/julia_import.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import warnings
|
4 |
+
|
5 |
+
# Check if JuliaCall is already loaded, and if so, warn the user
|
6 |
+
# about the relevant environment variables. If not loaded,
|
7 |
+
# set up sensible defaults.
|
8 |
+
if "juliacall" in sys.modules:
|
9 |
+
warnings.warn(
|
10 |
+
"juliacall module already imported. "
|
11 |
+
"Make sure that you have set the environment variable `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` to avoid segfaults. "
|
12 |
+
"Also note that PySR will not be able to configure `PYTHON_JULIACALL_THREADS` or `PYTHON_JULIACALL_OPTLEVEL` for you."
|
13 |
+
)
|
14 |
+
else:
|
15 |
+
# Required to avoid segfaults (https://juliapy.github.io/PythonCall.jl/dev/faq/)
|
16 |
+
if os.environ.get("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes") != "yes":
|
17 |
+
warnings.warn(
|
18 |
+
"PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set to something other than 'yes' or ''. "
|
19 |
+
+ "You will experience segfaults if running with multithreading."
|
20 |
+
)
|
21 |
+
|
22 |
+
if os.environ.get("PYTHON_JULIACALL_THREADS", "auto") != "auto":
|
23 |
+
warnings.warn(
|
24 |
+
"PYTHON_JULIACALL_THREADS environment variable is set to something other than 'auto', "
|
25 |
+
"so PySR was not able to set it. You may wish to set it to `'auto'` for full use "
|
26 |
+
"of your CPU."
|
27 |
+
)
|
28 |
+
|
29 |
+
# TODO: Remove these when juliapkg lets you specify this
|
30 |
+
for k, default in (
|
31 |
+
("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes"),
|
32 |
+
("PYTHON_JULIACALL_THREADS", "auto"),
|
33 |
+
("PYTHON_JULIACALL_OPTLEVEL", "3"),
|
34 |
+
):
|
35 |
+
os.environ[k] = os.environ.get(k, default)
|
36 |
+
|
37 |
+
|
38 |
+
from juliacall import Main as jl # type: ignore
|
39 |
+
|
40 |
+
# Overwrite the seval function to use Meta.parseall
|
41 |
+
# instead of Meta.parse.
|
42 |
+
jl.seval("using PythonCall: PythonCall, Py, pyconvert")
|
43 |
+
jl.seval(
|
44 |
+
"""function PythonCall.pyjlmodule_seval(self::Module, expr::Py)
|
45 |
+
e = Meta.parseall(strip(pyconvert(String, expr)))
|
46 |
+
Py(Base.eval(self, e))
|
47 |
+
end"""
|
48 |
+
)
|
49 |
+
# ^TODO: Overwrite this once PythonCall.jl is updated:
|
50 |
+
|
51 |
+
jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
|
52 |
+
|
53 |
+
# Next, automatically load the juliacall extension if we're in a Jupyter notebook
|
54 |
+
autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS", "yes")
|
55 |
+
if autoload_extensions in {"yes", ""} and jl_version >= (1, 9, 0):
|
56 |
+
try:
|
57 |
+
get_ipython = sys.modules["IPython"].get_ipython
|
58 |
+
|
59 |
+
if "IPKernelApp" not in get_ipython().config:
|
60 |
+
raise ImportError("console")
|
61 |
+
|
62 |
+
print(
|
63 |
+
"Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable."
|
64 |
+
)
|
65 |
+
|
66 |
+
# TODO: Turn this off if juliacall does this automatically
|
67 |
+
get_ipython().run_line_magic("load_ext", "juliacall")
|
68 |
+
except Exception:
|
69 |
+
pass
|
70 |
+
elif autoload_extensions not in {"no", "yes", ""}:
|
71 |
+
warnings.warn(
|
72 |
+
"PYSR_AUTOLOAD_EXTENSIONS environment variable is set to something other than 'yes' or 'no' or ''."
|
73 |
+
)
|
74 |
+
|
75 |
+
jl.seval("using SymbolicRegression")
|
76 |
+
SymbolicRegression = jl.SymbolicRegression
|
pysr/juliapkg.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"julia": "1.6",
|
3 |
+
"packages": {
|
4 |
+
"SymbolicRegression": {
|
5 |
+
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
6 |
+
"version": "=0.23.1"
|
7 |
+
},
|
8 |
+
"ClusterManagers": {
|
9 |
+
"uuid": "34f1f09b-3a8b-5176-ab39-66d58a4d544e",
|
10 |
+
"version": "0.4"
|
11 |
+
},
|
12 |
+
"Serialization": {
|
13 |
+
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
14 |
+
"version": "1"
|
15 |
+
},
|
16 |
+
"Zygote": {
|
17 |
+
"uuid": "e88e6eb3-aa80-5325-afca-941959d7151f",
|
18 |
+
"version": "0.6"
|
19 |
+
}
|
20 |
+
}
|
21 |
+
}
|
pysr/param_groupings.yml
CHANGED
@@ -8,10 +8,10 @@
|
|
8 |
- niterations
|
9 |
- populations
|
10 |
- population_size
|
11 |
-
-
|
12 |
- The Objective:
|
13 |
-
-
|
14 |
-
-
|
15 |
- model_selection
|
16 |
- dimensional_constraint_penalty
|
17 |
- Working with Complexities:
|
@@ -88,9 +88,7 @@
|
|
88 |
- temp_equation_file
|
89 |
- tempdir
|
90 |
- delete_tempfiles
|
91 |
-
- julia_project
|
92 |
- update
|
93 |
-
- julia_kwargs
|
94 |
- Exporting the Results:
|
95 |
- equation_file
|
96 |
- output_jax_format
|
|
|
8 |
- niterations
|
9 |
- populations
|
10 |
- population_size
|
11 |
+
- ncycles_per_iteration
|
12 |
- The Objective:
|
13 |
+
- elementwise_loss
|
14 |
+
- loss_function
|
15 |
- model_selection
|
16 |
- dimensional_constraint_penalty
|
17 |
- Working with Complexities:
|
|
|
88 |
- temp_equation_file
|
89 |
- tempdir
|
90 |
- delete_tempfiles
|
|
|
91 |
- update
|
|
|
92 |
- Exporting the Results:
|
93 |
- equation_file
|
94 |
- output_jax_format
|
pysr/sklearn_monkeypatch.py
CHANGED
@@ -9,5 +9,5 @@ def _ensure_no_complex_data(*args, **kwargs):
|
|
9 |
|
10 |
try:
|
11 |
validation._ensure_no_complex_data = _ensure_no_complex_data
|
12 |
-
except AttributeError:
|
13 |
...
|
|
|
9 |
|
10 |
try:
|
11 |
validation._ensure_no_complex_data = _ensure_no_complex_data
|
12 |
+
except AttributeError: # pragma: no cover
|
13 |
...
|
pysr/sr.py
CHANGED
@@ -25,7 +25,7 @@ from sklearn.utils import check_array, check_consistent_length, check_random_sta
|
|
25 |
from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
|
26 |
|
27 |
from .denoising import denoise, multi_denoise
|
28 |
-
from .deprecated import
|
29 |
from .export_jax import sympy2jax
|
30 |
from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
|
31 |
from .export_numpy import sympy2numpy
|
@@ -33,14 +33,14 @@ from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2
|
|
33 |
from .export_torch import sympy2torch
|
34 |
from .feature_selection import run_feature_selection
|
35 |
from .julia_helpers import (
|
|
|
36 |
_escape_filename,
|
37 |
-
_load_backend,
|
38 |
_load_cluster_manager,
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
is_julia_version_greater_eq,
|
43 |
)
|
|
|
44 |
from .utils import (
|
45 |
_csv_filename_to_pkl_filename,
|
46 |
_preprocess_julia_floats,
|
@@ -48,8 +48,6 @@ from .utils import (
|
|
48 |
_subscriptify,
|
49 |
)
|
50 |
|
51 |
-
Main = None # TODO: Rename to more descriptive name like "julia_runtime"
|
52 |
-
|
53 |
already_ran = False
|
54 |
|
55 |
|
@@ -92,7 +90,6 @@ def _process_constraints(binary_operators, unary_operators, constraints):
|
|
92 |
def _maybe_create_inline_operators(
|
93 |
binary_operators, unary_operators, extra_sympy_mappings
|
94 |
):
|
95 |
-
global Main
|
96 |
binary_operators = binary_operators.copy()
|
97 |
unary_operators = unary_operators.copy()
|
98 |
for op_list in [binary_operators, unary_operators]:
|
@@ -100,7 +97,7 @@ def _maybe_create_inline_operators(
|
|
100 |
is_user_defined_operator = "(" in op
|
101 |
|
102 |
if is_user_defined_operator:
|
103 |
-
|
104 |
# Cut off from the first non-alphanumeric char:
|
105 |
first_non_char = [j for j, char in enumerate(op) if char == "("][0]
|
106 |
function_name = op[:first_non_char]
|
@@ -271,7 +268,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
271 |
arguments are treated the same way, and the max of each
|
272 |
argument is constrained.
|
273 |
Default is `None`.
|
274 |
-
|
275 |
String of Julia code specifying an elementwise loss function.
|
276 |
Can either be a loss from LossFunctions.jl, or your own loss
|
277 |
written as a function. Examples of custom written losses include:
|
@@ -287,11 +284,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
287 |
`ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
|
288 |
`SigmoidLoss()`, `DWDMarginLoss(q)`.
|
289 |
Default is `"L2DistLoss()"`.
|
290 |
-
|
291 |
Alternatively, you can specify the full objective function as
|
292 |
a snippet of Julia code, including any sort of custom evaluation
|
293 |
(including symbolic manipulations beforehand), and any sort
|
294 |
-
of loss function or regularizations. The default `
|
295 |
used in SymbolicRegression.jl is roughly equal to:
|
296 |
```julia
|
297 |
function eval_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}
|
@@ -357,7 +354,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
357 |
takes a loss and complexity as input, for example:
|
358 |
`"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`.
|
359 |
Default is `None`.
|
360 |
-
|
361 |
Number of total mutations to run, per 10 samples of the
|
362 |
population, per iteration.
|
363 |
Default is `550`.
|
@@ -401,7 +398,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
401 |
Constant optimization can also be performed as a mutation, in addition to
|
402 |
the normal strategy controlled by `optimize_probability` which happens
|
403 |
every iteration. Using it as a mutation is useful if you want to use
|
404 |
-
a large `
|
405 |
Default is `0.0`.
|
406 |
crossover_probability : float
|
407 |
Absolute probability of crossover-type genetic operation, instead of a mutation.
|
@@ -536,11 +533,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
536 |
delete_tempfiles : bool
|
537 |
Whether to delete the temporary files after finishing.
|
538 |
Default is `True`.
|
539 |
-
julia_project : str
|
540 |
-
A Julia environment location containing a Project.toml
|
541 |
-
(and potentially the source code for SymbolicRegression.jl).
|
542 |
-
Default gives the Python package directory, where a
|
543 |
-
Project.toml file should be present from the install.
|
544 |
update: bool
|
545 |
Whether to automatically update Julia packages when `fit` is called.
|
546 |
You should make sure that PySR is up-to-date itself first, as
|
@@ -585,11 +577,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
585 |
before passing to the symbolic regression code. None means no
|
586 |
feature selection; an int means select that many features.
|
587 |
Default is `None`.
|
588 |
-
julia_kwargs : dict
|
589 |
-
Keyword arguments to pass to `julia.core.Julia(...)` to initialize
|
590 |
-
the Julia runtime. The default, when `None`, is to set `threads` equal
|
591 |
-
to `procs`, and `optimize` to 3.
|
592 |
-
Default is `None`.
|
593 |
**kwargs : dict
|
594 |
Supports deprecated keyword arguments. Other arguments will
|
595 |
result in an error.
|
@@ -617,8 +604,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
617 |
Path to the temporary equations directory.
|
618 |
equation_file_ : str
|
619 |
Output equation file name produced by the julia backend.
|
620 |
-
|
621 |
-
The state for the julia SymbolicRegression.jl backend
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
622 |
equation_file_contents_ : list[pandas.DataFrame]
|
623 |
Contents of the equation file output by the Julia backend.
|
624 |
show_pickle_warnings_ : bool
|
@@ -643,7 +637,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
643 |
... "inv(x) = 1/x", # Custom operator (julia syntax)
|
644 |
... ],
|
645 |
... model_selection="best",
|
646 |
-
...
|
647 |
... )
|
648 |
>>> model.fit(X, y)
|
649 |
>>> model
|
@@ -681,8 +675,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
681 |
timeout_in_seconds: Optional[float] = None,
|
682 |
constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None,
|
683 |
nested_constraints: Optional[Dict[str, Dict[str, int]]] = None,
|
684 |
-
|
685 |
-
|
686 |
complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
|
687 |
complexity_of_constants: Union[int, float] = 1,
|
688 |
complexity_of_variables: Union[int, float] = 1,
|
@@ -694,7 +688,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
694 |
alpha: float = 0.1,
|
695 |
annealing: bool = False,
|
696 |
early_stop_condition: Optional[Union[float, str]] = None,
|
697 |
-
|
698 |
fraction_replaced: float = 0.000364,
|
699 |
fraction_replaced_hof: float = 0.035,
|
700 |
weight_add_node: float = 0.79,
|
@@ -744,7 +738,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
744 |
temp_equation_file: bool = False,
|
745 |
tempdir: Optional[str] = None,
|
746 |
delete_tempfiles: bool = True,
|
747 |
-
julia_project: Optional[str] = None,
|
748 |
update: bool = False,
|
749 |
output_jax_format: bool = False,
|
750 |
output_torch_format: bool = False,
|
@@ -753,7 +746,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
753 |
extra_jax_mappings: Optional[Dict[Callable, str]] = None,
|
754 |
denoise: bool = False,
|
755 |
select_k_features: Optional[int] = None,
|
756 |
-
julia_kwargs: Optional[Dict] = None,
|
757 |
**kwargs,
|
758 |
):
|
759 |
# Hyperparameters
|
@@ -764,7 +756,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
764 |
self.niterations = niterations
|
765 |
self.populations = populations
|
766 |
self.population_size = population_size
|
767 |
-
self.
|
768 |
# - Equation Constraints
|
769 |
self.maxsize = maxsize
|
770 |
self.maxdepth = maxdepth
|
@@ -777,8 +769,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
777 |
self.timeout_in_seconds = timeout_in_seconds
|
778 |
self.early_stop_condition = early_stop_condition
|
779 |
# - Loss parameters
|
780 |
-
self.
|
781 |
-
self.
|
782 |
self.complexity_of_operators = complexity_of_operators
|
783 |
self.complexity_of_constants = complexity_of_constants
|
784 |
self.complexity_of_variables = complexity_of_variables
|
@@ -844,7 +836,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
844 |
self.temp_equation_file = temp_equation_file
|
845 |
self.tempdir = tempdir
|
846 |
self.delete_tempfiles = delete_tempfiles
|
847 |
-
self.julia_project = julia_project
|
848 |
self.update = update
|
849 |
self.output_jax_format = output_jax_format
|
850 |
self.output_torch_format = output_torch_format
|
@@ -854,16 +845,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
854 |
# Pre-modelling transformation
|
855 |
self.denoise = denoise
|
856 |
self.select_k_features = select_k_features
|
857 |
-
self.julia_kwargs = julia_kwargs
|
858 |
|
859 |
# Once all valid parameters have been assigned handle the
|
860 |
# deprecated kwargs
|
861 |
if len(kwargs) > 0: # pragma: no cover
|
862 |
-
deprecated_kwargs = make_deprecated_kwargs_for_pysr_regressor()
|
863 |
for k, v in kwargs.items():
|
864 |
# Handle renamed kwargs
|
865 |
-
if k in
|
866 |
-
updated_kwarg_name =
|
867 |
setattr(self, updated_kwarg_name, v)
|
868 |
warnings.warn(
|
869 |
f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
|
@@ -877,6 +866,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
877 |
f"Ignoring parameter; please pass {k} during the call to fit instead.",
|
878 |
FutureWarning,
|
879 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
880 |
else:
|
881 |
raise TypeError(
|
882 |
f"{k} is not a valid keyword argument for PySRRegressor."
|
@@ -1051,7 +1053,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1051 |
serialization.
|
1052 |
|
1053 |
Thus, for `PySRRegressor` to support pickle serialization, the
|
1054 |
-
`
|
1055 |
prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
|
1056 |
but does allow all other attributes of a fitted `PySRRegressor` estimator
|
1057 |
to be serialized. Note: Jax and Torch format equations are also removed
|
@@ -1061,12 +1063,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1061 |
show_pickle_warning = not (
|
1062 |
"show_pickle_warnings_" in state and not state["show_pickle_warnings_"]
|
1063 |
)
|
1064 |
-
if "raw_julia_state_" in state and show_pickle_warning:
|
1065 |
-
warnings.warn(
|
1066 |
-
"raw_julia_state_ cannot be pickled and will be removed from the "
|
1067 |
-
"serialized instance. This will prevent a `warm_start` fit of any "
|
1068 |
-
"model that is deserialized via `pickle.load()`."
|
1069 |
-
)
|
1070 |
state_keys_containing_lambdas = ["extra_sympy_mappings", "extra_torch_mappings"]
|
1071 |
for state_key in state_keys_containing_lambdas:
|
1072 |
if state[state_key] is not None and show_pickle_warning:
|
@@ -1075,7 +1071,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1075 |
"serialized instance. When loading the model, please redefine "
|
1076 |
f"`{state_key}` at runtime."
|
1077 |
)
|
1078 |
-
state_keys_to_clear =
|
1079 |
pickled_state = {
|
1080 |
key: (None if key in state_keys_to_clear else value)
|
1081 |
for key, value in state.items()
|
@@ -1125,6 +1121,24 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1125 |
)
|
1126 |
return self.equations_
|
1127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1128 |
def get_best(self, index=None):
|
1129 |
"""
|
1130 |
Get best equation using `model_selection`.
|
@@ -1238,8 +1252,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1238 |
"to True and `procs` to 0 will result in non-deterministic searches. "
|
1239 |
)
|
1240 |
|
1241 |
-
if self.
|
1242 |
-
raise ValueError(
|
|
|
|
|
1243 |
|
1244 |
# NotImplementedError - Values that could be supported at a later time
|
1245 |
if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS:
|
@@ -1291,16 +1307,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1291 |
> 0
|
1292 |
)
|
1293 |
|
1294 |
-
julia_kwargs = {}
|
1295 |
-
if self.julia_kwargs is not None:
|
1296 |
-
for key, value in self.julia_kwargs.items():
|
1297 |
-
julia_kwargs[key] = value
|
1298 |
-
if "optimize" not in julia_kwargs:
|
1299 |
-
julia_kwargs["optimize"] = 3
|
1300 |
-
if "threads" not in julia_kwargs and packed_modified_params["multithreading"]:
|
1301 |
-
julia_kwargs["threads"] = self.procs
|
1302 |
-
packed_modified_params["julia_kwargs"] = julia_kwargs
|
1303 |
-
|
1304 |
return packed_modified_params
|
1305 |
|
1306 |
def _validate_and_set_fit_params(
|
@@ -1528,7 +1534,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1528 |
# Need to be global as we don't want to recreate/reinstate julia for
|
1529 |
# every new instance of PySRRegressor
|
1530 |
global already_ran
|
1531 |
-
global Main
|
1532 |
|
1533 |
# These are the parameters which may be modified from the ones
|
1534 |
# specified in init, so we define them here locally:
|
@@ -1543,32 +1548,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1543 |
batch_size = mutated_params["batch_size"]
|
1544 |
update_verbosity = mutated_params["update_verbosity"]
|
1545 |
progress = mutated_params["progress"]
|
1546 |
-
julia_kwargs = mutated_params["julia_kwargs"]
|
1547 |
|
1548 |
# Start julia backend processes
|
1549 |
if not already_ran and update_verbosity != 0:
|
1550 |
print("Compiling Julia backend...")
|
1551 |
|
1552 |
-
Main = init_julia(self.julia_project, julia_kwargs=julia_kwargs)
|
1553 |
-
|
1554 |
if cluster_manager is not None:
|
1555 |
-
cluster_manager = _load_cluster_manager(
|
1556 |
-
|
1557 |
-
if self.update:
|
1558 |
-
_, is_shared = _process_julia_project(self.julia_project)
|
1559 |
-
io = "devnull" if update_verbosity == 0 else "stderr"
|
1560 |
-
io_arg = (
|
1561 |
-
f"io={io}" if is_julia_version_greater_eq(version=(1, 6, 0)) else ""
|
1562 |
-
)
|
1563 |
-
_update_julia_project(Main, is_shared, io_arg)
|
1564 |
-
|
1565 |
-
SymbolicRegression = _load_backend(Main)
|
1566 |
-
|
1567 |
-
Main.plus = Main.eval("(+)")
|
1568 |
-
Main.sub = Main.eval("(-)")
|
1569 |
-
Main.mult = Main.eval("(*)")
|
1570 |
-
Main.pow = Main.eval("(^)")
|
1571 |
-
Main.div = Main.eval("(/)")
|
1572 |
|
1573 |
# TODO(mcranmer): These functions should be part of this class.
|
1574 |
binary_operators, unary_operators = _maybe_create_inline_operators(
|
@@ -1594,7 +1580,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1594 |
nested_constraints_str += f"({inner_k}) => {inner_v}, "
|
1595 |
nested_constraints_str += "), "
|
1596 |
nested_constraints_str += ")"
|
1597 |
-
nested_constraints =
|
1598 |
|
1599 |
# Parse dict into Julia Dict for complexities:
|
1600 |
if complexity_of_operators is not None:
|
@@ -1602,13 +1588,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1602 |
for k, v in complexity_of_operators.items():
|
1603 |
complexity_of_operators_str += f"({k}) => {v}, "
|
1604 |
complexity_of_operators_str += ")"
|
1605 |
-
complexity_of_operators =
|
1606 |
|
1607 |
-
custom_loss =
|
1608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1609 |
|
1610 |
-
early_stop_condition =
|
1611 |
-
str(self.early_stop_condition)
|
|
|
|
|
1612 |
)
|
1613 |
|
1614 |
mutation_weights = SymbolicRegression.MutationWeights(
|
@@ -1627,10 +1621,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1627 |
# Call to Julia backend.
|
1628 |
# See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
|
1629 |
options = SymbolicRegression.Options(
|
1630 |
-
binary_operators=
|
1631 |
-
unary_operators=
|
1632 |
-
bin_constraints=bin_constraints,
|
1633 |
-
una_constraints=una_constraints,
|
1634 |
complexity_of_operators=complexity_of_operators,
|
1635 |
complexity_of_constants=self.complexity_of_constants,
|
1636 |
complexity_of_variables=self.complexity_of_variables,
|
@@ -1665,7 +1659,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1665 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
1666 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
1667 |
npop=self.population_size,
|
1668 |
-
ncycles_per_iteration=self.
|
1669 |
fraction_replaced=self.fraction_replaced,
|
1670 |
topn=self.topn,
|
1671 |
print_precision=self.print_precision,
|
@@ -1685,6 +1679,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1685 |
define_helper_functions=False,
|
1686 |
)
|
1687 |
|
|
|
|
|
1688 |
# Convert data to desired precision
|
1689 |
test_X = np.array(X)
|
1690 |
is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
|
@@ -1695,18 +1691,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1695 |
np_dtype = {32: np.complex64, 64: np.complex128}[self.precision]
|
1696 |
|
1697 |
# This converts the data into a Julia array:
|
1698 |
-
|
1699 |
if len(y.shape) == 1:
|
1700 |
-
|
1701 |
else:
|
1702 |
-
|
1703 |
if weights is not None:
|
1704 |
if len(weights.shape) == 1:
|
1705 |
-
|
1706 |
else:
|
1707 |
-
|
1708 |
else:
|
1709 |
-
|
1710 |
|
1711 |
if self.procs == 0 and not multithreading:
|
1712 |
parallelism = "serial"
|
@@ -1719,34 +1715,41 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1719 |
None if parallelism in ["serial", "multithreading"] else int(self.procs)
|
1720 |
)
|
1721 |
|
1722 |
-
y_variable_names = None
|
1723 |
if len(y.shape) > 1:
|
1724 |
# We set these manually so that they respect Python's 0 indexing
|
1725 |
# (by default Julia will use y1, y2...)
|
1726 |
-
|
|
|
|
|
|
|
|
|
1727 |
|
1728 |
-
|
1729 |
-
|
1730 |
-
|
1731 |
-
|
1732 |
-
|
1733 |
-
weights=Main.weights,
|
1734 |
niterations=int(self.niterations),
|
1735 |
-
variable_names=self.feature_names_in_
|
1736 |
-
display_variable_names=
|
1737 |
-
|
1738 |
-
|
1739 |
-
|
|
|
|
|
1740 |
options=options,
|
1741 |
numprocs=cprocs,
|
1742 |
parallelism=parallelism,
|
1743 |
-
saved_state=self.
|
1744 |
return_state=True,
|
1745 |
addprocs_function=cluster_manager,
|
1746 |
heap_size_hint_in_bytes=self.heap_size_hint_in_bytes,
|
1747 |
progress=progress and self.verbosity > 0 and len(y.shape) == 1,
|
1748 |
verbosity=int(self.verbosity),
|
1749 |
)
|
|
|
|
|
|
|
1750 |
|
1751 |
# Set attributes
|
1752 |
self.equations_ = self.get_hof()
|
@@ -1810,10 +1813,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1810 |
Fitted estimator.
|
1811 |
"""
|
1812 |
# Init attributes that are not specified in BaseEstimator
|
1813 |
-
if self.warm_start and hasattr(self, "
|
1814 |
pass
|
1815 |
else:
|
1816 |
-
if hasattr(self, "
|
1817 |
warnings.warn(
|
1818 |
"The discovered expressions are being reset. "
|
1819 |
"Please set `warm_start=True` if you wish to continue "
|
@@ -1823,7 +1826,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1823 |
self.equations_ = None
|
1824 |
self.nout_ = 1
|
1825 |
self.selection_mask_ = None
|
1826 |
-
self.
|
|
|
1827 |
self.X_units_ = None
|
1828 |
self.y_units_ = None
|
1829 |
|
|
|
25 |
from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
|
26 |
|
27 |
from .denoising import denoise, multi_denoise
|
28 |
+
from .deprecated import DEPRECATED_KWARGS
|
29 |
from .export_jax import sympy2jax
|
30 |
from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
|
31 |
from .export_numpy import sympy2numpy
|
|
|
33 |
from .export_torch import sympy2torch
|
34 |
from .feature_selection import run_feature_selection
|
35 |
from .julia_helpers import (
|
36 |
+
PythonCall,
|
37 |
_escape_filename,
|
|
|
38 |
_load_cluster_manager,
|
39 |
+
jl_array,
|
40 |
+
jl_deserialize,
|
41 |
+
jl_serialize,
|
|
|
42 |
)
|
43 |
+
from .julia_import import SymbolicRegression, jl
|
44 |
from .utils import (
|
45 |
_csv_filename_to_pkl_filename,
|
46 |
_preprocess_julia_floats,
|
|
|
48 |
_subscriptify,
|
49 |
)
|
50 |
|
|
|
|
|
51 |
already_ran = False
|
52 |
|
53 |
|
|
|
90 |
def _maybe_create_inline_operators(
|
91 |
binary_operators, unary_operators, extra_sympy_mappings
|
92 |
):
|
|
|
93 |
binary_operators = binary_operators.copy()
|
94 |
unary_operators = unary_operators.copy()
|
95 |
for op_list in [binary_operators, unary_operators]:
|
|
|
97 |
is_user_defined_operator = "(" in op
|
98 |
|
99 |
if is_user_defined_operator:
|
100 |
+
jl.seval(op)
|
101 |
# Cut off from the first non-alphanumeric char:
|
102 |
first_non_char = [j for j, char in enumerate(op) if char == "("][0]
|
103 |
function_name = op[:first_non_char]
|
|
|
268 |
arguments are treated the same way, and the max of each
|
269 |
argument is constrained.
|
270 |
Default is `None`.
|
271 |
+
elementwise_loss : str
|
272 |
String of Julia code specifying an elementwise loss function.
|
273 |
Can either be a loss from LossFunctions.jl, or your own loss
|
274 |
written as a function. Examples of custom written losses include:
|
|
|
284 |
`ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
|
285 |
`SigmoidLoss()`, `DWDMarginLoss(q)`.
|
286 |
Default is `"L2DistLoss()"`.
|
287 |
+
loss_function : str
|
288 |
Alternatively, you can specify the full objective function as
|
289 |
a snippet of Julia code, including any sort of custom evaluation
|
290 |
(including symbolic manipulations beforehand), and any sort
|
291 |
+
of loss function or regularizations. The default `loss_function`
|
292 |
used in SymbolicRegression.jl is roughly equal to:
|
293 |
```julia
|
294 |
function eval_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}
|
|
|
354 |
takes a loss and complexity as input, for example:
|
355 |
`"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`.
|
356 |
Default is `None`.
|
357 |
+
ncycles_per_iteration : int
|
358 |
Number of total mutations to run, per 10 samples of the
|
359 |
population, per iteration.
|
360 |
Default is `550`.
|
|
|
398 |
Constant optimization can also be performed as a mutation, in addition to
|
399 |
the normal strategy controlled by `optimize_probability` which happens
|
400 |
every iteration. Using it as a mutation is useful if you want to use
|
401 |
+
a large `ncycles_periteration`, and may not optimize very often.
|
402 |
Default is `0.0`.
|
403 |
crossover_probability : float
|
404 |
Absolute probability of crossover-type genetic operation, instead of a mutation.
|
|
|
533 |
delete_tempfiles : bool
|
534 |
Whether to delete the temporary files after finishing.
|
535 |
Default is `True`.
|
|
|
|
|
|
|
|
|
|
|
536 |
update: bool
|
537 |
Whether to automatically update Julia packages when `fit` is called.
|
538 |
You should make sure that PySR is up-to-date itself first, as
|
|
|
577 |
before passing to the symbolic regression code. None means no
|
578 |
feature selection; an int means select that many features.
|
579 |
Default is `None`.
|
|
|
|
|
|
|
|
|
|
|
580 |
**kwargs : dict
|
581 |
Supports deprecated keyword arguments. Other arguments will
|
582 |
result in an error.
|
|
|
604 |
Path to the temporary equations directory.
|
605 |
equation_file_ : str
|
606 |
Output equation file name produced by the julia backend.
|
607 |
+
julia_state_stream_ : ndarray
|
608 |
+
The serialized state for the julia SymbolicRegression.jl backend (after fitting),
|
609 |
+
stored as an array of uint8, produced by Julia's Serialization.serialize function.
|
610 |
+
julia_state_
|
611 |
+
The deserialized state.
|
612 |
+
julia_options_stream_ : ndarray
|
613 |
+
The serialized julia options, stored as an array of uint8,
|
614 |
+
julia_options_
|
615 |
+
The deserialized julia options.
|
616 |
equation_file_contents_ : list[pandas.DataFrame]
|
617 |
Contents of the equation file output by the Julia backend.
|
618 |
show_pickle_warnings_ : bool
|
|
|
637 |
... "inv(x) = 1/x", # Custom operator (julia syntax)
|
638 |
... ],
|
639 |
... model_selection="best",
|
640 |
+
... elementwise_loss="loss(x, y) = (x - y)^2", # Custom loss function (julia syntax)
|
641 |
... )
|
642 |
>>> model.fit(X, y)
|
643 |
>>> model
|
|
|
675 |
timeout_in_seconds: Optional[float] = None,
|
676 |
constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None,
|
677 |
nested_constraints: Optional[Dict[str, Dict[str, int]]] = None,
|
678 |
+
elementwise_loss: Optional[str] = None,
|
679 |
+
loss_function: Optional[str] = None,
|
680 |
complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
|
681 |
complexity_of_constants: Union[int, float] = 1,
|
682 |
complexity_of_variables: Union[int, float] = 1,
|
|
|
688 |
alpha: float = 0.1,
|
689 |
annealing: bool = False,
|
690 |
early_stop_condition: Optional[Union[float, str]] = None,
|
691 |
+
ncycles_per_iteration: int = 550,
|
692 |
fraction_replaced: float = 0.000364,
|
693 |
fraction_replaced_hof: float = 0.035,
|
694 |
weight_add_node: float = 0.79,
|
|
|
738 |
temp_equation_file: bool = False,
|
739 |
tempdir: Optional[str] = None,
|
740 |
delete_tempfiles: bool = True,
|
|
|
741 |
update: bool = False,
|
742 |
output_jax_format: bool = False,
|
743 |
output_torch_format: bool = False,
|
|
|
746 |
extra_jax_mappings: Optional[Dict[Callable, str]] = None,
|
747 |
denoise: bool = False,
|
748 |
select_k_features: Optional[int] = None,
|
|
|
749 |
**kwargs,
|
750 |
):
|
751 |
# Hyperparameters
|
|
|
756 |
self.niterations = niterations
|
757 |
self.populations = populations
|
758 |
self.population_size = population_size
|
759 |
+
self.ncycles_per_iteration = ncycles_per_iteration
|
760 |
# - Equation Constraints
|
761 |
self.maxsize = maxsize
|
762 |
self.maxdepth = maxdepth
|
|
|
769 |
self.timeout_in_seconds = timeout_in_seconds
|
770 |
self.early_stop_condition = early_stop_condition
|
771 |
# - Loss parameters
|
772 |
+
self.elementwise_loss = elementwise_loss
|
773 |
+
self.loss_function = loss_function
|
774 |
self.complexity_of_operators = complexity_of_operators
|
775 |
self.complexity_of_constants = complexity_of_constants
|
776 |
self.complexity_of_variables = complexity_of_variables
|
|
|
836 |
self.temp_equation_file = temp_equation_file
|
837 |
self.tempdir = tempdir
|
838 |
self.delete_tempfiles = delete_tempfiles
|
|
|
839 |
self.update = update
|
840 |
self.output_jax_format = output_jax_format
|
841 |
self.output_torch_format = output_torch_format
|
|
|
845 |
# Pre-modelling transformation
|
846 |
self.denoise = denoise
|
847 |
self.select_k_features = select_k_features
|
|
|
848 |
|
849 |
# Once all valid parameters have been assigned handle the
|
850 |
# deprecated kwargs
|
851 |
if len(kwargs) > 0: # pragma: no cover
|
|
|
852 |
for k, v in kwargs.items():
|
853 |
# Handle renamed kwargs
|
854 |
+
if k in DEPRECATED_KWARGS:
|
855 |
+
updated_kwarg_name = DEPRECATED_KWARGS[k]
|
856 |
setattr(self, updated_kwarg_name, v)
|
857 |
warnings.warn(
|
858 |
f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
|
|
|
866 |
f"Ignoring parameter; please pass {k} during the call to fit instead.",
|
867 |
FutureWarning,
|
868 |
)
|
869 |
+
elif k == "julia_project":
|
870 |
+
warnings.warn(
|
871 |
+
"The `julia_project` parameter has been deprecated. To use a custom "
|
872 |
+
"julia project, please see `https://astroautomata.com/PySR/backend`.",
|
873 |
+
FutureWarning,
|
874 |
+
)
|
875 |
+
elif k == "julia_kwargs":
|
876 |
+
warnings.warn(
|
877 |
+
"The `julia_kwargs` parameter has been deprecated. To pass custom "
|
878 |
+
"keyword arguments to the julia backend, you should use environment variables. "
|
879 |
+
"See the Julia documentation for more information.",
|
880 |
+
FutureWarning,
|
881 |
+
)
|
882 |
else:
|
883 |
raise TypeError(
|
884 |
f"{k} is not a valid keyword argument for PySRRegressor."
|
|
|
1053 |
serialization.
|
1054 |
|
1055 |
Thus, for `PySRRegressor` to support pickle serialization, the
|
1056 |
+
`julia_state_stream_` attribute must be hidden from pickle. This will
|
1057 |
prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
|
1058 |
but does allow all other attributes of a fitted `PySRRegressor` estimator
|
1059 |
to be serialized. Note: Jax and Torch format equations are also removed
|
|
|
1063 |
show_pickle_warning = not (
|
1064 |
"show_pickle_warnings_" in state and not state["show_pickle_warnings_"]
|
1065 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
1066 |
state_keys_containing_lambdas = ["extra_sympy_mappings", "extra_torch_mappings"]
|
1067 |
for state_key in state_keys_containing_lambdas:
|
1068 |
if state[state_key] is not None and show_pickle_warning:
|
|
|
1071 |
"serialized instance. When loading the model, please redefine "
|
1072 |
f"`{state_key}` at runtime."
|
1073 |
)
|
1074 |
+
state_keys_to_clear = state_keys_containing_lambdas
|
1075 |
pickled_state = {
|
1076 |
key: (None if key in state_keys_to_clear else value)
|
1077 |
for key, value in state.items()
|
|
|
1121 |
)
|
1122 |
return self.equations_
|
1123 |
|
1124 |
+
@property
|
1125 |
+
def julia_options_(self):
|
1126 |
+
return jl_deserialize(self.julia_options_stream_)
|
1127 |
+
|
1128 |
+
@property
|
1129 |
+
def julia_state_(self):
|
1130 |
+
return jl_deserialize(self.julia_state_stream_)
|
1131 |
+
|
1132 |
+
@property
|
1133 |
+
def raw_julia_state_(self):
|
1134 |
+
warnings.warn(
|
1135 |
+
"PySRRegressor.raw_julia_state_ is now deprecated. "
|
1136 |
+
"Please use PySRRegressor.julia_state_ instead, or julia_state_stream_ "
|
1137 |
+
"for the raw stream of bytes.",
|
1138 |
+
FutureWarning,
|
1139 |
+
)
|
1140 |
+
return self.julia_state_
|
1141 |
+
|
1142 |
def get_best(self, index=None):
|
1143 |
"""
|
1144 |
Get best equation using `model_selection`.
|
|
|
1252 |
"to True and `procs` to 0 will result in non-deterministic searches. "
|
1253 |
)
|
1254 |
|
1255 |
+
if self.elementwise_loss is not None and self.loss_function is not None:
|
1256 |
+
raise ValueError(
|
1257 |
+
"You cannot set both `elementwise_loss` and `loss_function`."
|
1258 |
+
)
|
1259 |
|
1260 |
# NotImplementedError - Values that could be supported at a later time
|
1261 |
if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS:
|
|
|
1307 |
> 0
|
1308 |
)
|
1309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1310 |
return packed_modified_params
|
1311 |
|
1312 |
def _validate_and_set_fit_params(
|
|
|
1534 |
# Need to be global as we don't want to recreate/reinstate julia for
|
1535 |
# every new instance of PySRRegressor
|
1536 |
global already_ran
|
|
|
1537 |
|
1538 |
# These are the parameters which may be modified from the ones
|
1539 |
# specified in init, so we define them here locally:
|
|
|
1548 |
batch_size = mutated_params["batch_size"]
|
1549 |
update_verbosity = mutated_params["update_verbosity"]
|
1550 |
progress = mutated_params["progress"]
|
|
|
1551 |
|
1552 |
# Start julia backend processes
|
1553 |
if not already_ran and update_verbosity != 0:
|
1554 |
print("Compiling Julia backend...")
|
1555 |
|
|
|
|
|
1556 |
if cluster_manager is not None:
|
1557 |
+
cluster_manager = _load_cluster_manager(cluster_manager)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1558 |
|
1559 |
# TODO(mcranmer): These functions should be part of this class.
|
1560 |
binary_operators, unary_operators = _maybe_create_inline_operators(
|
|
|
1580 |
nested_constraints_str += f"({inner_k}) => {inner_v}, "
|
1581 |
nested_constraints_str += "), "
|
1582 |
nested_constraints_str += ")"
|
1583 |
+
nested_constraints = jl.seval(nested_constraints_str)
|
1584 |
|
1585 |
# Parse dict into Julia Dict for complexities:
|
1586 |
if complexity_of_operators is not None:
|
|
|
1588 |
for k, v in complexity_of_operators.items():
|
1589 |
complexity_of_operators_str += f"({k}) => {v}, "
|
1590 |
complexity_of_operators_str += ")"
|
1591 |
+
complexity_of_operators = jl.seval(complexity_of_operators_str)
|
1592 |
|
1593 |
+
custom_loss = jl.seval(
|
1594 |
+
str(self.elementwise_loss)
|
1595 |
+
if self.elementwise_loss is not None
|
1596 |
+
else "nothing"
|
1597 |
+
)
|
1598 |
+
custom_full_objective = jl.seval(
|
1599 |
+
str(self.loss_function) if self.loss_function is not None else "nothing"
|
1600 |
+
)
|
1601 |
|
1602 |
+
early_stop_condition = jl.seval(
|
1603 |
+
str(self.early_stop_condition)
|
1604 |
+
if self.early_stop_condition is not None
|
1605 |
+
else "nothing"
|
1606 |
)
|
1607 |
|
1608 |
mutation_weights = SymbolicRegression.MutationWeights(
|
|
|
1621 |
# Call to Julia backend.
|
1622 |
# See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
|
1623 |
options = SymbolicRegression.Options(
|
1624 |
+
binary_operators=jl.seval(str(binary_operators).replace("'", "")),
|
1625 |
+
unary_operators=jl.seval(str(unary_operators).replace("'", "")),
|
1626 |
+
bin_constraints=jl_array(bin_constraints),
|
1627 |
+
una_constraints=jl_array(una_constraints),
|
1628 |
complexity_of_operators=complexity_of_operators,
|
1629 |
complexity_of_constants=self.complexity_of_constants,
|
1630 |
complexity_of_variables=self.complexity_of_variables,
|
|
|
1659 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
1660 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
1661 |
npop=self.population_size,
|
1662 |
+
ncycles_per_iteration=self.ncycles_per_iteration,
|
1663 |
fraction_replaced=self.fraction_replaced,
|
1664 |
topn=self.topn,
|
1665 |
print_precision=self.print_precision,
|
|
|
1679 |
define_helper_functions=False,
|
1680 |
)
|
1681 |
|
1682 |
+
self.julia_options_stream_ = jl_serialize(options)
|
1683 |
+
|
1684 |
# Convert data to desired precision
|
1685 |
test_X = np.array(X)
|
1686 |
is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
|
|
|
1691 |
np_dtype = {32: np.complex64, 64: np.complex128}[self.precision]
|
1692 |
|
1693 |
# This converts the data into a Julia array:
|
1694 |
+
jl_X = jl_array(np.array(X, dtype=np_dtype).T)
|
1695 |
if len(y.shape) == 1:
|
1696 |
+
jl_y = jl_array(np.array(y, dtype=np_dtype))
|
1697 |
else:
|
1698 |
+
jl_y = jl_array(np.array(y, dtype=np_dtype).T)
|
1699 |
if weights is not None:
|
1700 |
if len(weights.shape) == 1:
|
1701 |
+
jl_weights = jl_array(np.array(weights, dtype=np_dtype))
|
1702 |
else:
|
1703 |
+
jl_weights = jl_array(np.array(weights, dtype=np_dtype).T)
|
1704 |
else:
|
1705 |
+
jl_weights = None
|
1706 |
|
1707 |
if self.procs == 0 and not multithreading:
|
1708 |
parallelism = "serial"
|
|
|
1715 |
None if parallelism in ["serial", "multithreading"] else int(self.procs)
|
1716 |
)
|
1717 |
|
|
|
1718 |
if len(y.shape) > 1:
|
1719 |
# We set these manually so that they respect Python's 0 indexing
|
1720 |
# (by default Julia will use y1, y2...)
|
1721 |
+
jl_y_variable_names = jl_array(
|
1722 |
+
[f"y{_subscriptify(i)}" for i in range(y.shape[1])]
|
1723 |
+
)
|
1724 |
+
else:
|
1725 |
+
jl_y_variable_names = None
|
1726 |
|
1727 |
+
PythonCall.GC.disable()
|
1728 |
+
out = SymbolicRegression.equation_search(
|
1729 |
+
jl_X,
|
1730 |
+
jl_y,
|
1731 |
+
weights=jl_weights,
|
|
|
1732 |
niterations=int(self.niterations),
|
1733 |
+
variable_names=jl_array([str(v) for v in self.feature_names_in_]),
|
1734 |
+
display_variable_names=jl_array(
|
1735 |
+
[str(v) for v in self.display_feature_names_in_]
|
1736 |
+
),
|
1737 |
+
y_variable_names=jl_y_variable_names,
|
1738 |
+
X_units=jl_array(self.X_units_),
|
1739 |
+
y_units=jl_array(self.y_units_),
|
1740 |
options=options,
|
1741 |
numprocs=cprocs,
|
1742 |
parallelism=parallelism,
|
1743 |
+
saved_state=self.julia_state_,
|
1744 |
return_state=True,
|
1745 |
addprocs_function=cluster_manager,
|
1746 |
heap_size_hint_in_bytes=self.heap_size_hint_in_bytes,
|
1747 |
progress=progress and self.verbosity > 0 and len(y.shape) == 1,
|
1748 |
verbosity=int(self.verbosity),
|
1749 |
)
|
1750 |
+
PythonCall.GC.enable()
|
1751 |
+
|
1752 |
+
self.julia_state_stream_ = jl_serialize(out)
|
1753 |
|
1754 |
# Set attributes
|
1755 |
self.equations_ = self.get_hof()
|
|
|
1813 |
Fitted estimator.
|
1814 |
"""
|
1815 |
# Init attributes that are not specified in BaseEstimator
|
1816 |
+
if self.warm_start and hasattr(self, "julia_state_stream_"):
|
1817 |
pass
|
1818 |
else:
|
1819 |
+
if hasattr(self, "julia_state_stream_"):
|
1820 |
warnings.warn(
|
1821 |
"The discovered expressions are being reset. "
|
1822 |
"Please set `warm_start=True` if you wish to continue "
|
|
|
1826 |
self.equations_ = None
|
1827 |
self.nout_ = 1
|
1828 |
self.selection_mask_ = None
|
1829 |
+
self.julia_state_stream_ = None
|
1830 |
+
self.julia_options_stream_ = None
|
1831 |
self.X_units_ = None
|
1832 |
self.y_units_ = None
|
1833 |
|
pysr/test/__init__.py
CHANGED
@@ -1,7 +1,15 @@
|
|
1 |
from .test import runtests
|
2 |
-
from .test_cli import
|
3 |
-
from .
|
4 |
from .test_jax import runtests as runtests_jax
|
|
|
5 |
from .test_torch import runtests as runtests_torch
|
6 |
|
7 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from .test import runtests
|
2 |
+
from .test_cli import get_runtests as get_runtests_cli
|
3 |
+
from .test_dev import runtests as runtests_dev
|
4 |
from .test_jax import runtests as runtests_jax
|
5 |
+
from .test_startup import runtests as runtests_startup
|
6 |
from .test_torch import runtests as runtests_torch
|
7 |
|
8 |
+
__all__ = [
|
9 |
+
"runtests",
|
10 |
+
"runtests_jax",
|
11 |
+
"runtests_torch",
|
12 |
+
"get_runtests_cli",
|
13 |
+
"runtests_startup",
|
14 |
+
"runtests_dev",
|
15 |
+
]
|
pysr/test/__main__.py
CHANGED
@@ -1,43 +1,13 @@
|
|
1 |
"""CLI for running PySR's test suite."""
|
2 |
import argparse
|
3 |
-
import os
|
4 |
|
5 |
from . import *
|
6 |
|
7 |
if __name__ == "__main__":
|
8 |
# Get args:
|
9 |
parser = argparse.ArgumentParser()
|
10 |
-
parser.usage = "python -m pysr.test [tests...]"
|
11 |
parser.add_argument(
|
12 |
"test",
|
13 |
nargs="*",
|
14 |
-
help="
|
15 |
)
|
16 |
-
|
17 |
-
# Parse args:
|
18 |
-
args = parser.parse_args()
|
19 |
-
tests = args.test
|
20 |
-
|
21 |
-
if len(tests) == 0:
|
22 |
-
# Raise help message:
|
23 |
-
parser.print_help()
|
24 |
-
raise SystemExit(1)
|
25 |
-
|
26 |
-
# Run tests:
|
27 |
-
for test in tests:
|
28 |
-
if test in {"main", "env", "jax", "torch", "cli"}:
|
29 |
-
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
30 |
-
print(f"Running test from {cur_dir}")
|
31 |
-
if test == "main":
|
32 |
-
runtests()
|
33 |
-
elif test == "env":
|
34 |
-
runtests_env()
|
35 |
-
elif test == "jax":
|
36 |
-
runtests_jax()
|
37 |
-
elif test == "torch":
|
38 |
-
runtests_torch()
|
39 |
-
elif test == "cli":
|
40 |
-
runtests_cli()
|
41 |
-
else:
|
42 |
-
parser.print_help()
|
43 |
-
raise SystemExit(1)
|
|
|
1 |
"""CLI for running PySR's test suite."""
|
2 |
import argparse
|
|
|
3 |
|
4 |
from . import *
|
5 |
|
6 |
if __name__ == "__main__":
|
7 |
# Get args:
|
8 |
parser = argparse.ArgumentParser()
|
|
|
9 |
parser.add_argument(
|
10 |
"test",
|
11 |
nargs="*",
|
12 |
+
help="DEPRECATED. Use `python -m pysr test [tests...]` instead.",
|
13 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pysr/test/generate_dev_juliapkg.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Example call:
|
2 |
+
## python3 generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl
|
3 |
+
import json
|
4 |
+
import sys
|
5 |
+
|
6 |
+
juliapkg_json = sys.argv[1]
|
7 |
+
path_to_srjl = sys.argv[2]
|
8 |
+
|
9 |
+
with open(juliapkg_json, "r") as f:
|
10 |
+
juliapkg = json.load(f)
|
11 |
+
|
12 |
+
del juliapkg["packages"]["SymbolicRegression"]["version"]
|
13 |
+
juliapkg["packages"]["SymbolicRegression"]["path"] = path_to_srjl
|
14 |
+
juliapkg["packages"]["SymbolicRegression"]["dev"] = True
|
15 |
+
|
16 |
+
with open(juliapkg_json, "w") as f:
|
17 |
+
json.dump(juliapkg, f, indent=4)
|
pysr/test/incremental_install_simulator.dockerfile
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
# This dockerfile simulates a user installation that first
|
2 |
-
# builds PySR for Python 3.9, and then upgrades to Python 3.10.
|
3 |
-
# Normally this would cause an error when installing PyCall, so we want to
|
4 |
-
# ensure that PySR can automatically patch things.
|
5 |
-
FROM debian:bullseye-slim
|
6 |
-
|
7 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
8 |
-
|
9 |
-
# Install juliaup and pyenv:
|
10 |
-
RUN apt-get update && apt-get install -y curl git build-essential \
|
11 |
-
libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev \
|
12 |
-
libncurses5-dev libncursesw5-dev xz-utils libffi-dev liblzma-dev && \
|
13 |
-
apt-get clean && \
|
14 |
-
rm -rf /var/lib/apt/lists/*
|
15 |
-
|
16 |
-
# Install juliaup:
|
17 |
-
RUN curl -fsSL https://install.julialang.org | sh -s -- -y
|
18 |
-
|
19 |
-
# Install pyenv:
|
20 |
-
RUN curl -fsSL curl https://pyenv.run | sh && \
|
21 |
-
echo 'export PATH="/root/.pyenv/bin:$PATH"' >> ~/.bashrc && \
|
22 |
-
echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \
|
23 |
-
echo 'eval "$(pyenv init -)"' >> ~/.bashrc && \
|
24 |
-
echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc
|
25 |
-
|
26 |
-
# Default to using bash -l:
|
27 |
-
SHELL ["/bin/bash", "-l", "-c"]
|
28 |
-
|
29 |
-
RUN juliaup add 1.8 && juliaup default 1.8
|
30 |
-
RUN pyenv install 3.9.2 && pyenv global 3.9.2
|
31 |
-
RUN python3 -m pip install --upgrade pip
|
32 |
-
|
33 |
-
# Get PySR source:
|
34 |
-
WORKDIR /pysr
|
35 |
-
ADD ./requirements.txt /pysr/requirements.txt
|
36 |
-
RUN python3 -m pip install -r /pysr/requirements.txt
|
37 |
-
|
38 |
-
ADD ./setup.py /pysr/setup.py
|
39 |
-
ADD ./pysr/ /pysr/pysr/
|
40 |
-
|
41 |
-
# First install of PySR:
|
42 |
-
RUN python3 -m pip install .
|
43 |
-
RUN python3 -m pysr install
|
44 |
-
|
45 |
-
# Change Python version:
|
46 |
-
RUN pyenv install 3.10 && pyenv global 3.10 && pyenv uninstall -f 3.9.2
|
47 |
-
RUN python3 -m pip install --upgrade pip
|
48 |
-
|
49 |
-
# Second install of PySR:
|
50 |
-
RUN python3 -m pip install .
|
51 |
-
RUN rm -r ~/.julia/environments/pysr-*
|
52 |
-
RUN python3 -m pysr install
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pysr/test/nb_sanitize.cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[pathnames]
|
2 |
+
regex: /[a-zA-Z0-9_\- .\/]+/pysr/sr\.py
|
3 |
+
replace: PATH
|
pysr/test/params.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import inspect
|
2 |
+
|
3 |
+
from .. import PySRRegressor
|
4 |
+
|
5 |
+
DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
|
6 |
+
DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
|
7 |
+
DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default
|
8 |
+
DEFAULT_NCYCLES = DEFAULT_PARAMS["ncycles_per_iteration"].default
|
pysr/test/test.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import inspect
|
2 |
import os
|
3 |
import pickle as pkl
|
4 |
import tempfile
|
@@ -12,16 +11,18 @@ import pandas as pd
|
|
12 |
import sympy
|
13 |
from sklearn.utils.estimator_checks import check_estimator
|
14 |
|
15 |
-
from .. import PySRRegressor,
|
16 |
from ..export_latex import sympy2latex
|
17 |
from ..feature_selection import _handle_feature_selection, run_feature_selection
|
|
|
18 |
from ..sr import _check_assertions, _process_constraints, idx_model_selection
|
19 |
from ..utils import _csv_filename_to_pkl_filename
|
20 |
-
|
21 |
-
|
22 |
-
DEFAULT_NITERATIONS
|
23 |
-
|
24 |
-
|
|
|
25 |
|
26 |
|
27 |
class TestPipeline(unittest.TestCase):
|
@@ -80,7 +81,7 @@ class TestPipeline(unittest.TestCase):
|
|
80 |
multithreading=False,
|
81 |
turbo=True,
|
82 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
|
83 |
-
|
84 |
function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
|
85 |
prediction, flag = eval_tree_array(tree, dataset.X, options)
|
86 |
!flag && return T(Inf)
|
@@ -95,22 +96,39 @@ class TestPipeline(unittest.TestCase):
|
|
95 |
self.assertLessEqual(best_loss, 1e-10)
|
96 |
self.assertGreaterEqual(best_loss, 0.0)
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def test_high_precision_search_custom_loss(self):
|
99 |
y = 1.23456789 * self.X[:, 0]
|
100 |
model = PySRRegressor(
|
101 |
**self.default_test_kwargs,
|
102 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
|
103 |
-
|
104 |
precision=64,
|
105 |
parsimony=0.01,
|
106 |
warm_start=True,
|
107 |
)
|
108 |
model.fit(self.X, y)
|
109 |
-
from pysr.sr import Main
|
110 |
|
111 |
# We should have that the model state is now a Float64 hof:
|
112 |
-
|
113 |
-
self.assertTrue(
|
|
|
|
|
|
|
114 |
|
115 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
116 |
y = self.X[:, [0, 1]] ** 2
|
@@ -199,6 +217,7 @@ class TestPipeline(unittest.TestCase):
|
|
199 |
**self.default_test_kwargs,
|
200 |
early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6",
|
201 |
)
|
|
|
202 |
model.fit(X, y)
|
203 |
test_y = model.predict(X)
|
204 |
self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating))
|
@@ -224,16 +243,17 @@ class TestPipeline(unittest.TestCase):
|
|
224 |
# Test if repeated fit works:
|
225 |
regressor.set_params(
|
226 |
niterations=1,
|
227 |
-
|
228 |
warm_start=True,
|
229 |
early_stop_condition=None,
|
230 |
)
|
231 |
-
# Check that the the julia state is saved:
|
232 |
-
from pysr.sr import Main
|
233 |
|
234 |
# We should have that the model state is now a Float32 hof:
|
235 |
-
|
236 |
-
self.assertTrue(
|
|
|
|
|
|
|
237 |
# This should exit almost immediately, and use the old equations
|
238 |
regressor.fit(X, y)
|
239 |
|
@@ -548,6 +568,17 @@ class TestMiscellaneous(unittest.TestCase):
|
|
548 |
# The correct value should be set:
|
549 |
self.assertEqual(model.fraction_replaced, 0.2)
|
550 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
551 |
def test_power_law_warning(self):
|
552 |
"""Ensure that a warning is given for a power law operator."""
|
553 |
with self.assertWarns(UserWarning):
|
@@ -594,23 +625,6 @@ class TestMiscellaneous(unittest.TestCase):
|
|
594 |
with self.assertRaises(ValueError):
|
595 |
model.fit(X, y)
|
596 |
|
597 |
-
def test_changed_options_warning(self):
|
598 |
-
"""Check that a warning is given if Julia options are changed."""
|
599 |
-
if julia_helpers.julia_kwargs_at_initialization is None:
|
600 |
-
julia_helpers.init_julia(julia_kwargs={"threads": 2, "optimize": 3})
|
601 |
-
|
602 |
-
cur_init = julia_helpers.julia_kwargs_at_initialization
|
603 |
-
|
604 |
-
threads_to_change = cur_init["threads"] + 1
|
605 |
-
with warnings.catch_warnings():
|
606 |
-
warnings.simplefilter("error")
|
607 |
-
with self.assertRaises(Exception) as context:
|
608 |
-
julia_helpers.init_julia(
|
609 |
-
julia_kwargs={"threads": threads_to_change, "optimize": 3}
|
610 |
-
)
|
611 |
-
self.assertIn("Julia has already started", str(context.exception))
|
612 |
-
self.assertIn("threads", str(context.exception))
|
613 |
-
|
614 |
def test_extra_sympy_mappings_undefined(self):
|
615 |
"""extra_sympy_mappings=None errors for custom operators"""
|
616 |
model = PySRRegressor(unary_operators=["square2(x) = x^2"])
|
@@ -640,6 +654,50 @@ class TestMiscellaneous(unittest.TestCase):
|
|
640 |
model.fit(X, y, variable_names=["f{c}"])
|
641 |
self.assertIn("Invalid variable name", str(cm.exception))
|
642 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
def test_pickle_with_temp_equation_file(self):
|
644 |
"""If we have a temporary equation file, unpickle the estimator."""
|
645 |
model = PySRRegressor(
|
@@ -678,7 +736,7 @@ class TestMiscellaneous(unittest.TestCase):
|
|
678 |
model = PySRRegressor(
|
679 |
niterations=int(1 + DEFAULT_NITERATIONS / 10),
|
680 |
populations=int(1 + DEFAULT_POPULATIONS / 3),
|
681 |
-
|
682 |
verbosity=0,
|
683 |
progress=False,
|
684 |
random_state=0,
|
@@ -715,6 +773,9 @@ class TestMiscellaneous(unittest.TestCase):
|
|
715 |
def test_param_groupings(self):
|
716 |
"""Test that param_groupings are complete"""
|
717 |
param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
|
|
|
|
|
|
|
718 |
# Read the file, discarding lines ending in ":",
|
719 |
# and removing leading "\s*-\s*":
|
720 |
params = []
|
@@ -1072,10 +1133,8 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
1072 |
# TODO: Determine desired behavior if second .fit() call does not have units
|
1073 |
|
1074 |
|
1075 |
-
def runtests():
|
1076 |
"""Run all tests in test.py."""
|
1077 |
-
suite = unittest.TestSuite()
|
1078 |
-
loader = unittest.TestLoader()
|
1079 |
test_cases = [
|
1080 |
TestPipeline,
|
1081 |
TestBest,
|
@@ -1084,8 +1143,11 @@ def runtests():
|
|
1084 |
TestLaTeXTable,
|
1085 |
TestDimensionalConstraints,
|
1086 |
]
|
|
|
|
|
|
|
|
|
1087 |
for test_case in test_cases:
|
1088 |
-
|
1089 |
-
suite.addTests(tests)
|
1090 |
runner = unittest.TextTestRunner()
|
1091 |
return runner.run(suite)
|
|
|
|
|
1 |
import os
|
2 |
import pickle as pkl
|
3 |
import tempfile
|
|
|
11 |
import sympy
|
12 |
from sklearn.utils.estimator_checks import check_estimator
|
13 |
|
14 |
+
from .. import PySRRegressor, install, jl
|
15 |
from ..export_latex import sympy2latex
|
16 |
from ..feature_selection import _handle_feature_selection, run_feature_selection
|
17 |
+
from ..julia_helpers import init_julia
|
18 |
from ..sr import _check_assertions, _process_constraints, idx_model_selection
|
19 |
from ..utils import _csv_filename_to_pkl_filename
|
20 |
+
from .params import (
|
21 |
+
DEFAULT_NCYCLES,
|
22 |
+
DEFAULT_NITERATIONS,
|
23 |
+
DEFAULT_PARAMS,
|
24 |
+
DEFAULT_POPULATIONS,
|
25 |
+
)
|
26 |
|
27 |
|
28 |
class TestPipeline(unittest.TestCase):
|
|
|
81 |
multithreading=False,
|
82 |
turbo=True,
|
83 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
|
84 |
+
loss_function="""
|
85 |
function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
|
86 |
prediction, flag = eval_tree_array(tree, dataset.X, options)
|
87 |
!flag && return T(Inf)
|
|
|
96 |
self.assertLessEqual(best_loss, 1e-10)
|
97 |
self.assertGreaterEqual(best_loss, 0.0)
|
98 |
|
99 |
+
# Test options stored:
|
100 |
+
self.assertEqual(model.julia_options_.turbo, True)
|
101 |
+
|
102 |
+
def test_multiline_seval(self):
|
103 |
+
# The user should be able to run multiple things in a single seval call:
|
104 |
+
num = jl.seval(
|
105 |
+
"""
|
106 |
+
function my_new_objective(x)
|
107 |
+
x^2
|
108 |
+
end
|
109 |
+
1.5
|
110 |
+
"""
|
111 |
+
)
|
112 |
+
self.assertEqual(num, 1.5)
|
113 |
+
|
114 |
def test_high_precision_search_custom_loss(self):
|
115 |
y = 1.23456789 * self.X[:, 0]
|
116 |
model = PySRRegressor(
|
117 |
**self.default_test_kwargs,
|
118 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
|
119 |
+
elementwise_loss="my_loss(prediction, target) = (prediction - target)^2",
|
120 |
precision=64,
|
121 |
parsimony=0.01,
|
122 |
warm_start=True,
|
123 |
)
|
124 |
model.fit(self.X, y)
|
|
|
125 |
|
126 |
# We should have that the model state is now a Float64 hof:
|
127 |
+
test_state = model.raw_julia_state_
|
128 |
+
self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)
|
129 |
+
|
130 |
+
# Test options stored:
|
131 |
+
self.assertEqual(model.julia_options_.turbo, False)
|
132 |
|
133 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
134 |
y = self.X[:, [0, 1]] ** 2
|
|
|
217 |
**self.default_test_kwargs,
|
218 |
early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6",
|
219 |
)
|
220 |
+
model.niterations = DEFAULT_NITERATIONS * 10
|
221 |
model.fit(X, y)
|
222 |
test_y = model.predict(X)
|
223 |
self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating))
|
|
|
243 |
# Test if repeated fit works:
|
244 |
regressor.set_params(
|
245 |
niterations=1,
|
246 |
+
ncycles_per_iteration=2,
|
247 |
warm_start=True,
|
248 |
early_stop_condition=None,
|
249 |
)
|
|
|
|
|
250 |
|
251 |
# We should have that the model state is now a Float32 hof:
|
252 |
+
test_state = regressor.julia_state_
|
253 |
+
self.assertTrue(
|
254 |
+
jl.first(jl.typeof(jl.last(test_state)).parameters) == jl.Float32
|
255 |
+
)
|
256 |
+
|
257 |
# This should exit almost immediately, and use the old equations
|
258 |
regressor.fit(X, y)
|
259 |
|
|
|
568 |
# The correct value should be set:
|
569 |
self.assertEqual(model.fraction_replaced, 0.2)
|
570 |
|
571 |
+
def test_deprecated_functions(self):
|
572 |
+
with self.assertWarns(FutureWarning):
|
573 |
+
install()
|
574 |
+
|
575 |
+
_jl = None
|
576 |
+
|
577 |
+
with self.assertWarns(FutureWarning):
|
578 |
+
_jl = init_julia()
|
579 |
+
|
580 |
+
self.assertEqual(_jl, jl)
|
581 |
+
|
582 |
def test_power_law_warning(self):
|
583 |
"""Ensure that a warning is given for a power law operator."""
|
584 |
with self.assertWarns(UserWarning):
|
|
|
625 |
with self.assertRaises(ValueError):
|
626 |
model.fit(X, y)
|
627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
628 |
def test_extra_sympy_mappings_undefined(self):
|
629 |
"""extra_sympy_mappings=None errors for custom operators"""
|
630 |
model = PySRRegressor(unary_operators=["square2(x) = x^2"])
|
|
|
654 |
model.fit(X, y, variable_names=["f{c}"])
|
655 |
self.assertIn("Invalid variable name", str(cm.exception))
|
656 |
|
657 |
+
def test_bad_kwargs(self):
|
658 |
+
bad_kwargs = [
|
659 |
+
dict(
|
660 |
+
kwargs=dict(
|
661 |
+
elementwise_loss="g(x, y) = 0.0", loss_function="f(*args) = 0.0"
|
662 |
+
),
|
663 |
+
error=ValueError,
|
664 |
+
),
|
665 |
+
dict(
|
666 |
+
kwargs=dict(maxsize=3),
|
667 |
+
error=ValueError,
|
668 |
+
),
|
669 |
+
dict(
|
670 |
+
kwargs=dict(tournament_selection_n=10, population_size=3),
|
671 |
+
error=ValueError,
|
672 |
+
),
|
673 |
+
dict(
|
674 |
+
kwargs=dict(optimizer_algorithm="COBYLA"),
|
675 |
+
error=NotImplementedError,
|
676 |
+
),
|
677 |
+
dict(
|
678 |
+
kwargs=dict(
|
679 |
+
constraints={
|
680 |
+
"+": (3, 5),
|
681 |
+
}
|
682 |
+
),
|
683 |
+
error=NotImplementedError,
|
684 |
+
),
|
685 |
+
dict(
|
686 |
+
kwargs=dict(binary_operators=["α(x, y) = x - y"]),
|
687 |
+
error=ValueError,
|
688 |
+
),
|
689 |
+
dict(
|
690 |
+
kwargs=dict(model_selection="unknown"),
|
691 |
+
error=NotImplementedError,
|
692 |
+
),
|
693 |
+
]
|
694 |
+
for opt in bad_kwargs:
|
695 |
+
model = PySRRegressor(**opt["kwargs"], niterations=1)
|
696 |
+
with self.assertRaises(opt["error"]):
|
697 |
+
model.fit([[1]], [1])
|
698 |
+
model.get_best()
|
699 |
+
print("Failed", opt["kwargs"])
|
700 |
+
|
701 |
def test_pickle_with_temp_equation_file(self):
|
702 |
"""If we have a temporary equation file, unpickle the estimator."""
|
703 |
model = PySRRegressor(
|
|
|
736 |
model = PySRRegressor(
|
737 |
niterations=int(1 + DEFAULT_NITERATIONS / 10),
|
738 |
populations=int(1 + DEFAULT_POPULATIONS / 3),
|
739 |
+
ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
|
740 |
verbosity=0,
|
741 |
progress=False,
|
742 |
random_state=0,
|
|
|
773 |
def test_param_groupings(self):
|
774 |
"""Test that param_groupings are complete"""
|
775 |
param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
|
776 |
+
if not param_groupings_file.exists():
|
777 |
+
return
|
778 |
+
|
779 |
# Read the file, discarding lines ending in ":",
|
780 |
# and removing leading "\s*-\s*":
|
781 |
params = []
|
|
|
1133 |
# TODO: Determine desired behavior if second .fit() call does not have units
|
1134 |
|
1135 |
|
1136 |
+
def runtests(just_tests=False):
|
1137 |
"""Run all tests in test.py."""
|
|
|
|
|
1138 |
test_cases = [
|
1139 |
TestPipeline,
|
1140 |
TestBest,
|
|
|
1143 |
TestLaTeXTable,
|
1144 |
TestDimensionalConstraints,
|
1145 |
]
|
1146 |
+
if just_tests:
|
1147 |
+
return test_cases
|
1148 |
+
suite = unittest.TestSuite()
|
1149 |
+
loader = unittest.TestLoader()
|
1150 |
for test_case in test_cases:
|
1151 |
+
suite.addTests(loader.loadTestsFromTestCase(test_case))
|
|
|
1152 |
runner = unittest.TextTestRunner()
|
1153 |
return runner.run(suite)
|
pysr/test/test_cli.py
CHANGED
@@ -1,59 +1,83 @@
|
|
1 |
import unittest
|
|
|
2 |
|
3 |
from click import testing as click_testing
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
"
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
"
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import unittest
|
2 |
+
from textwrap import dedent
|
3 |
|
4 |
from click import testing as click_testing
|
5 |
|
6 |
+
|
7 |
+
def get_runtests():
|
8 |
+
# Lazy load to avoid circular imports.
|
9 |
+
|
10 |
+
from .._cli.main import pysr
|
11 |
+
|
12 |
+
class TestCli(unittest.TestCase):
|
13 |
+
# TODO: Include test for custom project here.
|
14 |
+
def setUp(self):
|
15 |
+
self.cli_runner = click_testing.CliRunner()
|
16 |
+
|
17 |
+
def test_help_on_all_commands(self):
|
18 |
+
expected = dedent(
|
19 |
+
"""
|
20 |
+
Usage: pysr [OPTIONS] COMMAND [ARGS]...
|
21 |
+
|
22 |
+
Options:
|
23 |
+
--help Show this message and exit.
|
24 |
+
|
25 |
+
Commands:
|
26 |
+
install DEPRECATED (dependencies are now installed at import).
|
27 |
+
test Run parts of the PySR test suite.
|
28 |
+
"""
|
29 |
+
)
|
30 |
+
result = self.cli_runner.invoke(pysr, ["--help"])
|
31 |
+
self.assertEqual(result.output.strip(), expected.strip())
|
32 |
+
self.assertEqual(result.exit_code, 0)
|
33 |
+
|
34 |
+
def test_help_on_install(self):
|
35 |
+
expected = dedent(
|
36 |
+
"""
|
37 |
+
Usage: pysr install [OPTIONS]
|
38 |
+
|
39 |
+
DEPRECATED (dependencies are now installed at import).
|
40 |
+
|
41 |
+
Options:
|
42 |
+
-p, --project TEXT
|
43 |
+
-q, --quiet Disable logging.
|
44 |
+
--precompile
|
45 |
+
--no-precompile
|
46 |
+
--help Show this message and exit.
|
47 |
+
"""
|
48 |
+
)
|
49 |
+
result = self.cli_runner.invoke(pysr, ["install", "--help"])
|
50 |
+
self.assertEqual(result.output.strip(), expected.strip())
|
51 |
+
self.assertEqual(result.exit_code, 0)
|
52 |
+
|
53 |
+
def test_help_on_test(self):
|
54 |
+
expected = dedent(
|
55 |
+
"""
|
56 |
+
Usage: pysr test [OPTIONS] TESTS
|
57 |
+
|
58 |
+
Run parts of the PySR test suite.
|
59 |
+
|
60 |
+
Choose from main, jax, torch, cli, dev, and startup. You can give multiple
|
61 |
+
tests, separated by commas.
|
62 |
+
|
63 |
+
Options:
|
64 |
+
--help Show this message and exit.
|
65 |
+
"""
|
66 |
+
)
|
67 |
+
result = self.cli_runner.invoke(pysr, ["test", "--help"])
|
68 |
+
self.assertEqual(result.output.strip(), expected.strip())
|
69 |
+
self.assertEqual(result.exit_code, 0)
|
70 |
+
|
71 |
+
def runtests(just_tests=False):
|
72 |
+
"""Run all tests in cliTest.py."""
|
73 |
+
tests = [TestCli]
|
74 |
+
if just_tests:
|
75 |
+
return tests
|
76 |
+
loader = unittest.TestLoader()
|
77 |
+
suite = unittest.TestSuite()
|
78 |
+
for test in tests:
|
79 |
+
suite.addTests(loader.loadTestsFromTestCase(test))
|
80 |
+
runner = unittest.TextTestRunner()
|
81 |
+
return runner.run(suite)
|
82 |
+
|
83 |
+
return runtests
|
pysr/test/test_dev.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
import unittest
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
|
7 |
+
class TestDev(unittest.TestCase):
|
8 |
+
def test_simple_change_to_backend(self):
|
9 |
+
"""Test that we can use a development version of SymbolicRegression.jl"""
|
10 |
+
PYSR_TEST_JULIA_VERSION = os.environ.get("PYSR_TEST_JULIA_VERSION", "1.6")
|
11 |
+
PYSR_TEST_PYTHON_VERSION = os.environ.get("PYSR_TEST_PYTHON_VERSION", "3.9")
|
12 |
+
build_result = subprocess.run(
|
13 |
+
[
|
14 |
+
"docker",
|
15 |
+
"build",
|
16 |
+
"-t",
|
17 |
+
"pysr-dev",
|
18 |
+
"--build-arg",
|
19 |
+
f"JLVERSION={PYSR_TEST_JULIA_VERSION}",
|
20 |
+
"--build-arg",
|
21 |
+
f"PYVERSION={PYSR_TEST_PYTHON_VERSION}",
|
22 |
+
"-f",
|
23 |
+
"pysr/test/test_dev_pysr.dockerfile",
|
24 |
+
".",
|
25 |
+
],
|
26 |
+
env=os.environ,
|
27 |
+
cwd=Path(__file__).parent.parent.parent,
|
28 |
+
universal_newlines=True,
|
29 |
+
)
|
30 |
+
self.assertEqual(build_result.returncode, 0)
|
31 |
+
test_result = subprocess.run(
|
32 |
+
[
|
33 |
+
"docker",
|
34 |
+
"run",
|
35 |
+
"--rm",
|
36 |
+
"pysr-dev",
|
37 |
+
"python3",
|
38 |
+
"-c",
|
39 |
+
"from pysr import SymbolicRegression as SR; print(SR.__test_function())",
|
40 |
+
],
|
41 |
+
stdout=subprocess.PIPE,
|
42 |
+
stderr=subprocess.PIPE,
|
43 |
+
env=os.environ,
|
44 |
+
cwd=Path(__file__).parent.parent.parent,
|
45 |
+
)
|
46 |
+
self.assertEqual(test_result.returncode, 0)
|
47 |
+
self.assertEqual(test_result.stdout.decode("utf-8").strip(), "2.3")
|
48 |
+
|
49 |
+
|
50 |
+
def runtests(just_tests=False):
|
51 |
+
tests = [TestDev]
|
52 |
+
if just_tests:
|
53 |
+
return tests
|
54 |
+
suite = unittest.TestSuite()
|
55 |
+
loader = unittest.TestLoader()
|
56 |
+
for test in tests:
|
57 |
+
suite.addTests(loader.loadTestsFromTestCase(test))
|
58 |
+
runner = unittest.TextTestRunner()
|
59 |
+
return runner.run(suite)
|
pysr/test/test_dev_pysr.dockerfile
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This dockerfile simulates a user installation that
|
2 |
+
# tries to manually edit SymbolicRegression.jl and
|
3 |
+
# use it from PySR.
|
4 |
+
|
5 |
+
ARG JLVERSION=1.9.4
|
6 |
+
ARG PYVERSION=3.11.6
|
7 |
+
ARG BASE_IMAGE=bullseye
|
8 |
+
|
9 |
+
FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
|
10 |
+
FROM python:${PYVERSION}-${BASE_IMAGE}
|
11 |
+
|
12 |
+
# Merge Julia image:
|
13 |
+
COPY --from=jl /usr/local/julia /usr/local/julia
|
14 |
+
ENV PATH="/usr/local/julia/bin:${PATH}"
|
15 |
+
|
16 |
+
WORKDIR /pysr
|
17 |
+
|
18 |
+
# Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
|
19 |
+
ADD ./requirements.txt /pysr/requirements.txt
|
20 |
+
RUN pip3 install --no-cache-dir -r /pysr/requirements.txt
|
21 |
+
|
22 |
+
# Install PySR:
|
23 |
+
# We do a minimal copy so it doesn't need to rerun at every file change:
|
24 |
+
ADD ./pyproject.toml /pysr/pyproject.toml
|
25 |
+
ADD ./setup.py /pysr/setup.py
|
26 |
+
|
27 |
+
RUN mkdir /pysr/pysr
|
28 |
+
ADD ./pysr/*.py /pysr/pysr/
|
29 |
+
ADD ./pysr/juliapkg.json /pysr/pysr/juliapkg.json
|
30 |
+
|
31 |
+
RUN mkdir /pysr/pysr/_cli
|
32 |
+
ADD ./pysr/_cli/*.py /pysr/pysr/_cli/
|
33 |
+
|
34 |
+
RUN mkdir /pysr/pysr/test
|
35 |
+
|
36 |
+
RUN pip3 install --no-cache-dir .
|
37 |
+
|
38 |
+
# Now, we create a custom version of SymbolicRegression.jl
|
39 |
+
# First, we get the version from juliapkg.json:
|
40 |
+
RUN python3 -c 'import json; print(json.load(open("/pysr/pysr/juliapkg.json", "r"))["packages"]["SymbolicRegression"]["version"])' > /pysr/sr_version
|
41 |
+
|
42 |
+
# Remove any = or ^ or ~ from the version:
|
43 |
+
RUN cat /pysr/sr_version | sed 's/[\^=~]//g' > /pysr/sr_version_processed
|
44 |
+
|
45 |
+
# Now, we check out the version of SymbolicRegression.jl that PySR is using:
|
46 |
+
RUN git clone -b "v$(cat /pysr/sr_version_processed)" --single-branch https://github.com/MilesCranmer/SymbolicRegression.jl /srjl
|
47 |
+
|
48 |
+
# Edit SymbolicRegression.jl to create a new function.
|
49 |
+
# We want to put this function immediately after `module SymbolicRegression`:
|
50 |
+
RUN sed -i 's/module SymbolicRegression/module SymbolicRegression\n__test_function() = 2.3/' /srjl/src/SymbolicRegression.jl
|
51 |
+
|
52 |
+
# Edit PySR to use the custom version of SymbolicRegression.jl:
|
53 |
+
ADD ./pysr/test/generate_dev_juliapkg.py /generate_dev_juliapkg.py
|
54 |
+
RUN python3 /generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl
|
55 |
+
|
56 |
+
# Precompile
|
57 |
+
RUN python3 -c 'import pysr'
|
pysr/test/test_env.py
DELETED
@@ -1,58 +0,0 @@
|
|
1 |
-
"""Contains tests for creating and initializing custom Julia projects."""
|
2 |
-
|
3 |
-
import os
|
4 |
-
import unittest
|
5 |
-
from tempfile import TemporaryDirectory
|
6 |
-
|
7 |
-
from .. import julia_helpers
|
8 |
-
|
9 |
-
|
10 |
-
class TestJuliaProject(unittest.TestCase):
|
11 |
-
"""Various tests for working with Julia projects."""
|
12 |
-
|
13 |
-
def test_custom_shared_env(self):
|
14 |
-
"""Test that we can use PySR in a custom shared env."""
|
15 |
-
with TemporaryDirectory() as tmpdir:
|
16 |
-
# Create a temp depot to store julia packages (and our custom env)
|
17 |
-
Main = julia_helpers.init_julia()
|
18 |
-
|
19 |
-
# Set up env:
|
20 |
-
if "JULIA_DEPOT_PATH" not in os.environ:
|
21 |
-
old_env = None
|
22 |
-
os.environ["JULIA_DEPOT_PATH"] = tmpdir
|
23 |
-
else:
|
24 |
-
old_env = os.environ["JULIA_DEPOT_PATH"]
|
25 |
-
os.environ[
|
26 |
-
"JULIA_DEPOT_PATH"
|
27 |
-
] = f"{tmpdir}:{os.environ['JULIA_DEPOT_PATH']}"
|
28 |
-
Main.eval(
|
29 |
-
f'pushfirst!(DEPOT_PATH, "{julia_helpers._escape_filename(tmpdir)}")'
|
30 |
-
)
|
31 |
-
test_env_name = "@pysr_test_env"
|
32 |
-
julia_helpers.install(julia_project=test_env_name)
|
33 |
-
Main = julia_helpers.init_julia(julia_project=test_env_name)
|
34 |
-
|
35 |
-
# Try to use env:
|
36 |
-
Main.eval("using SymbolicRegression")
|
37 |
-
Main.eval("using Pkg")
|
38 |
-
|
39 |
-
# Assert we actually loaded it:
|
40 |
-
cur_project_dir = Main.eval("splitdir(dirname(Base.active_project()))[1]")
|
41 |
-
potential_shared_project_dirs = Main.eval("Pkg.envdir(DEPOT_PATH[1])")
|
42 |
-
self.assertEqual(cur_project_dir, potential_shared_project_dirs)
|
43 |
-
|
44 |
-
# Clean up:
|
45 |
-
Main.eval("pop!(DEPOT_PATH)")
|
46 |
-
if old_env is None:
|
47 |
-
del os.environ["JULIA_DEPOT_PATH"]
|
48 |
-
else:
|
49 |
-
os.environ["JULIA_DEPOT_PATH"] = old_env
|
50 |
-
|
51 |
-
|
52 |
-
def runtests():
|
53 |
-
"""Run all tests in test_env.py."""
|
54 |
-
loader = unittest.TestLoader()
|
55 |
-
suite = unittest.TestSuite()
|
56 |
-
suite.addTests(loader.loadTestsFromTestCase(TestJuliaProject))
|
57 |
-
runner = unittest.TextTestRunner()
|
58 |
-
return runner.run(suite)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pysr/test/test_jax.py
CHANGED
@@ -121,10 +121,14 @@ class TestJAX(unittest.TestCase):
|
|
121 |
np.testing.assert_almost_equal(y.values, jax_output, decimal=3)
|
122 |
|
123 |
|
124 |
-
def runtests():
|
125 |
"""Run all tests in test_jax.py."""
|
|
|
|
|
|
|
126 |
loader = unittest.TestLoader()
|
127 |
suite = unittest.TestSuite()
|
128 |
-
|
|
|
129 |
runner = unittest.TextTestRunner()
|
130 |
return runner.run(suite)
|
|
|
121 |
np.testing.assert_almost_equal(y.values, jax_output, decimal=3)
|
122 |
|
123 |
|
124 |
+
def runtests(just_tests=False):
|
125 |
"""Run all tests in test_jax.py."""
|
126 |
+
tests = [TestJAX]
|
127 |
+
if just_tests:
|
128 |
+
return tests
|
129 |
loader = unittest.TestLoader()
|
130 |
suite = unittest.TestSuite()
|
131 |
+
for test in tests:
|
132 |
+
suite.addTests(loader.loadTestsFromTestCase(test))
|
133 |
runner = unittest.TextTestRunner()
|
134 |
return runner.run(suite)
|
pysr/test/test_nb.ipynb
ADDED
@@ -0,0 +1,536 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"# NBVAL_IGNORE_OUTPUT\n",
|
10 |
+
"import numpy as np\n",
|
11 |
+
"from pysr import PySRRegressor, jl"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 2,
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [
|
19 |
+
{
|
20 |
+
"name": "stdout",
|
21 |
+
"output_type": "stream",
|
22 |
+
"text": [
|
23 |
+
"3\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
+
"source": [
|
28 |
+
"%%julia\n",
|
29 |
+
"\n",
|
30 |
+
"# Automatically activates Julia magic\n",
|
31 |
+
"\n",
|
32 |
+
"x = 1\n",
|
33 |
+
"println(x + 2)"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 3,
|
39 |
+
"metadata": {},
|
40 |
+
"outputs": [
|
41 |
+
{
|
42 |
+
"name": "stdout",
|
43 |
+
"output_type": "stream",
|
44 |
+
"text": [
|
45 |
+
"4\n"
|
46 |
+
]
|
47 |
+
}
|
48 |
+
],
|
49 |
+
"source": [
|
50 |
+
"%julia println(x + 3)"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"cell_type": "code",
|
55 |
+
"execution_count": 4,
|
56 |
+
"metadata": {},
|
57 |
+
"outputs": [
|
58 |
+
{
|
59 |
+
"data": {
|
60 |
+
"text/html": [
|
61 |
+
"<style>#sk-container-id-1 {\n",
|
62 |
+
" /* Definition of color scheme common for light and dark mode */\n",
|
63 |
+
" --sklearn-color-text: black;\n",
|
64 |
+
" --sklearn-color-line: gray;\n",
|
65 |
+
" /* Definition of color scheme for unfitted estimators */\n",
|
66 |
+
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
67 |
+
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
68 |
+
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
69 |
+
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
70 |
+
" /* Definition of color scheme for fitted estimators */\n",
|
71 |
+
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
72 |
+
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
73 |
+
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
74 |
+
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
75 |
+
"\n",
|
76 |
+
" /* Specific color for light theme */\n",
|
77 |
+
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
78 |
+
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
79 |
+
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
80 |
+
" --sklearn-color-icon: #696969;\n",
|
81 |
+
"\n",
|
82 |
+
" @media (prefers-color-scheme: dark) {\n",
|
83 |
+
" /* Redefinition of color scheme for dark theme */\n",
|
84 |
+
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
85 |
+
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
86 |
+
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
87 |
+
" --sklearn-color-icon: #878787;\n",
|
88 |
+
" }\n",
|
89 |
+
"}\n",
|
90 |
+
"\n",
|
91 |
+
"#sk-container-id-1 {\n",
|
92 |
+
" color: var(--sklearn-color-text);\n",
|
93 |
+
"}\n",
|
94 |
+
"\n",
|
95 |
+
"#sk-container-id-1 pre {\n",
|
96 |
+
" padding: 0;\n",
|
97 |
+
"}\n",
|
98 |
+
"\n",
|
99 |
+
"#sk-container-id-1 input.sk-hidden--visually {\n",
|
100 |
+
" border: 0;\n",
|
101 |
+
" clip: rect(1px 1px 1px 1px);\n",
|
102 |
+
" clip: rect(1px, 1px, 1px, 1px);\n",
|
103 |
+
" height: 1px;\n",
|
104 |
+
" margin: -1px;\n",
|
105 |
+
" overflow: hidden;\n",
|
106 |
+
" padding: 0;\n",
|
107 |
+
" position: absolute;\n",
|
108 |
+
" width: 1px;\n",
|
109 |
+
"}\n",
|
110 |
+
"\n",
|
111 |
+
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
|
112 |
+
" border: 1px dashed var(--sklearn-color-line);\n",
|
113 |
+
" margin: 0 0.4em 0.5em 0.4em;\n",
|
114 |
+
" box-sizing: border-box;\n",
|
115 |
+
" padding-bottom: 0.4em;\n",
|
116 |
+
" background-color: var(--sklearn-color-background);\n",
|
117 |
+
"}\n",
|
118 |
+
"\n",
|
119 |
+
"#sk-container-id-1 div.sk-container {\n",
|
120 |
+
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
121 |
+
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
122 |
+
" so we also need the `!important` here to be able to override the\n",
|
123 |
+
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
124 |
+
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
125 |
+
" display: inline-block !important;\n",
|
126 |
+
" position: relative;\n",
|
127 |
+
"}\n",
|
128 |
+
"\n",
|
129 |
+
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
|
130 |
+
" display: none;\n",
|
131 |
+
"}\n",
|
132 |
+
"\n",
|
133 |
+
"div.sk-parallel-item,\n",
|
134 |
+
"div.sk-serial,\n",
|
135 |
+
"div.sk-item {\n",
|
136 |
+
" /* draw centered vertical line to link estimators */\n",
|
137 |
+
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
138 |
+
" background-size: 2px 100%;\n",
|
139 |
+
" background-repeat: no-repeat;\n",
|
140 |
+
" background-position: center center;\n",
|
141 |
+
"}\n",
|
142 |
+
"\n",
|
143 |
+
"/* Parallel-specific style estimator block */\n",
|
144 |
+
"\n",
|
145 |
+
"#sk-container-id-1 div.sk-parallel-item::after {\n",
|
146 |
+
" content: \"\";\n",
|
147 |
+
" width: 100%;\n",
|
148 |
+
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
149 |
+
" flex-grow: 1;\n",
|
150 |
+
"}\n",
|
151 |
+
"\n",
|
152 |
+
"#sk-container-id-1 div.sk-parallel {\n",
|
153 |
+
" display: flex;\n",
|
154 |
+
" align-items: stretch;\n",
|
155 |
+
" justify-content: center;\n",
|
156 |
+
" background-color: var(--sklearn-color-background);\n",
|
157 |
+
" position: relative;\n",
|
158 |
+
"}\n",
|
159 |
+
"\n",
|
160 |
+
"#sk-container-id-1 div.sk-parallel-item {\n",
|
161 |
+
" display: flex;\n",
|
162 |
+
" flex-direction: column;\n",
|
163 |
+
"}\n",
|
164 |
+
"\n",
|
165 |
+
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
|
166 |
+
" align-self: flex-end;\n",
|
167 |
+
" width: 50%;\n",
|
168 |
+
"}\n",
|
169 |
+
"\n",
|
170 |
+
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
|
171 |
+
" align-self: flex-start;\n",
|
172 |
+
" width: 50%;\n",
|
173 |
+
"}\n",
|
174 |
+
"\n",
|
175 |
+
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
|
176 |
+
" width: 0;\n",
|
177 |
+
"}\n",
|
178 |
+
"\n",
|
179 |
+
"/* Serial-specific style estimator block */\n",
|
180 |
+
"\n",
|
181 |
+
"#sk-container-id-1 div.sk-serial {\n",
|
182 |
+
" display: flex;\n",
|
183 |
+
" flex-direction: column;\n",
|
184 |
+
" align-items: center;\n",
|
185 |
+
" background-color: var(--sklearn-color-background);\n",
|
186 |
+
" padding-right: 1em;\n",
|
187 |
+
" padding-left: 1em;\n",
|
188 |
+
"}\n",
|
189 |
+
"\n",
|
190 |
+
"\n",
|
191 |
+
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
192 |
+
"clickable and can be expanded/collapsed.\n",
|
193 |
+
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
194 |
+
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
195 |
+
"*/\n",
|
196 |
+
"\n",
|
197 |
+
"/* Pipeline and ColumnTransformer style (default) */\n",
|
198 |
+
"\n",
|
199 |
+
"#sk-container-id-1 div.sk-toggleable {\n",
|
200 |
+
" /* Default theme specific background. It is overwritten whether we have a\n",
|
201 |
+
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
202 |
+
" background-color: var(--sklearn-color-background);\n",
|
203 |
+
"}\n",
|
204 |
+
"\n",
|
205 |
+
"/* Toggleable label */\n",
|
206 |
+
"#sk-container-id-1 label.sk-toggleable__label {\n",
|
207 |
+
" cursor: pointer;\n",
|
208 |
+
" display: block;\n",
|
209 |
+
" width: 100%;\n",
|
210 |
+
" margin-bottom: 0;\n",
|
211 |
+
" padding: 0.5em;\n",
|
212 |
+
" box-sizing: border-box;\n",
|
213 |
+
" text-align: center;\n",
|
214 |
+
"}\n",
|
215 |
+
"\n",
|
216 |
+
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
|
217 |
+
" /* Arrow on the left of the label */\n",
|
218 |
+
" content: \"▸\";\n",
|
219 |
+
" float: left;\n",
|
220 |
+
" margin-right: 0.25em;\n",
|
221 |
+
" color: var(--sklearn-color-icon);\n",
|
222 |
+
"}\n",
|
223 |
+
"\n",
|
224 |
+
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
|
225 |
+
" color: var(--sklearn-color-text);\n",
|
226 |
+
"}\n",
|
227 |
+
"\n",
|
228 |
+
"/* Toggleable content - dropdown */\n",
|
229 |
+
"\n",
|
230 |
+
"#sk-container-id-1 div.sk-toggleable__content {\n",
|
231 |
+
" max-height: 0;\n",
|
232 |
+
" max-width: 0;\n",
|
233 |
+
" overflow: hidden;\n",
|
234 |
+
" text-align: left;\n",
|
235 |
+
" /* unfitted */\n",
|
236 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
237 |
+
"}\n",
|
238 |
+
"\n",
|
239 |
+
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
|
240 |
+
" /* fitted */\n",
|
241 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
242 |
+
"}\n",
|
243 |
+
"\n",
|
244 |
+
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
|
245 |
+
" margin: 0.2em;\n",
|
246 |
+
" border-radius: 0.25em;\n",
|
247 |
+
" color: var(--sklearn-color-text);\n",
|
248 |
+
" /* unfitted */\n",
|
249 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
250 |
+
"}\n",
|
251 |
+
"\n",
|
252 |
+
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
|
253 |
+
" /* unfitted */\n",
|
254 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
255 |
+
"}\n",
|
256 |
+
"\n",
|
257 |
+
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
258 |
+
" /* Expand drop-down */\n",
|
259 |
+
" max-height: 200px;\n",
|
260 |
+
" max-width: 100%;\n",
|
261 |
+
" overflow: auto;\n",
|
262 |
+
"}\n",
|
263 |
+
"\n",
|
264 |
+
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
265 |
+
" content: \"▾\";\n",
|
266 |
+
"}\n",
|
267 |
+
"\n",
|
268 |
+
"/* Pipeline/ColumnTransformer-specific style */\n",
|
269 |
+
"\n",
|
270 |
+
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
271 |
+
" color: var(--sklearn-color-text);\n",
|
272 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
273 |
+
"}\n",
|
274 |
+
"\n",
|
275 |
+
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
276 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
277 |
+
"}\n",
|
278 |
+
"\n",
|
279 |
+
"/* Estimator-specific style */\n",
|
280 |
+
"\n",
|
281 |
+
"/* Colorize estimator box */\n",
|
282 |
+
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
283 |
+
" /* unfitted */\n",
|
284 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
285 |
+
"}\n",
|
286 |
+
"\n",
|
287 |
+
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
288 |
+
" /* fitted */\n",
|
289 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
290 |
+
"}\n",
|
291 |
+
"\n",
|
292 |
+
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
|
293 |
+
"#sk-container-id-1 div.sk-label label {\n",
|
294 |
+
" /* The background is the default theme color */\n",
|
295 |
+
" color: var(--sklearn-color-text-on-default-background);\n",
|
296 |
+
"}\n",
|
297 |
+
"\n",
|
298 |
+
"/* On hover, darken the color of the background */\n",
|
299 |
+
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
|
300 |
+
" color: var(--sklearn-color-text);\n",
|
301 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
302 |
+
"}\n",
|
303 |
+
"\n",
|
304 |
+
"/* Label box, darken color on hover, fitted */\n",
|
305 |
+
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
306 |
+
" color: var(--sklearn-color-text);\n",
|
307 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
308 |
+
"}\n",
|
309 |
+
"\n",
|
310 |
+
"/* Estimator label */\n",
|
311 |
+
"\n",
|
312 |
+
"#sk-container-id-1 div.sk-label label {\n",
|
313 |
+
" font-family: monospace;\n",
|
314 |
+
" font-weight: bold;\n",
|
315 |
+
" display: inline-block;\n",
|
316 |
+
" line-height: 1.2em;\n",
|
317 |
+
"}\n",
|
318 |
+
"\n",
|
319 |
+
"#sk-container-id-1 div.sk-label-container {\n",
|
320 |
+
" text-align: center;\n",
|
321 |
+
"}\n",
|
322 |
+
"\n",
|
323 |
+
"/* Estimator-specific */\n",
|
324 |
+
"#sk-container-id-1 div.sk-estimator {\n",
|
325 |
+
" font-family: monospace;\n",
|
326 |
+
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
327 |
+
" border-radius: 0.25em;\n",
|
328 |
+
" box-sizing: border-box;\n",
|
329 |
+
" margin-bottom: 0.5em;\n",
|
330 |
+
" /* unfitted */\n",
|
331 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
332 |
+
"}\n",
|
333 |
+
"\n",
|
334 |
+
"#sk-container-id-1 div.sk-estimator.fitted {\n",
|
335 |
+
" /* fitted */\n",
|
336 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
337 |
+
"}\n",
|
338 |
+
"\n",
|
339 |
+
"/* on hover */\n",
|
340 |
+
"#sk-container-id-1 div.sk-estimator:hover {\n",
|
341 |
+
" /* unfitted */\n",
|
342 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
343 |
+
"}\n",
|
344 |
+
"\n",
|
345 |
+
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
|
346 |
+
" /* fitted */\n",
|
347 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
348 |
+
"}\n",
|
349 |
+
"\n",
|
350 |
+
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
351 |
+
"\n",
|
352 |
+
"/* Common style for \"i\" and \"?\" */\n",
|
353 |
+
"\n",
|
354 |
+
".sk-estimator-doc-link,\n",
|
355 |
+
"a:link.sk-estimator-doc-link,\n",
|
356 |
+
"a:visited.sk-estimator-doc-link {\n",
|
357 |
+
" float: right;\n",
|
358 |
+
" font-size: smaller;\n",
|
359 |
+
" line-height: 1em;\n",
|
360 |
+
" font-family: monospace;\n",
|
361 |
+
" background-color: var(--sklearn-color-background);\n",
|
362 |
+
" border-radius: 1em;\n",
|
363 |
+
" height: 1em;\n",
|
364 |
+
" width: 1em;\n",
|
365 |
+
" text-decoration: none !important;\n",
|
366 |
+
" margin-left: 1ex;\n",
|
367 |
+
" /* unfitted */\n",
|
368 |
+
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
369 |
+
" color: var(--sklearn-color-unfitted-level-1);\n",
|
370 |
+
"}\n",
|
371 |
+
"\n",
|
372 |
+
".sk-estimator-doc-link.fitted,\n",
|
373 |
+
"a:link.sk-estimator-doc-link.fitted,\n",
|
374 |
+
"a:visited.sk-estimator-doc-link.fitted {\n",
|
375 |
+
" /* fitted */\n",
|
376 |
+
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
377 |
+
" color: var(--sklearn-color-fitted-level-1);\n",
|
378 |
+
"}\n",
|
379 |
+
"\n",
|
380 |
+
"/* On hover */\n",
|
381 |
+
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
382 |
+
".sk-estimator-doc-link:hover,\n",
|
383 |
+
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
384 |
+
".sk-estimator-doc-link:hover {\n",
|
385 |
+
" /* unfitted */\n",
|
386 |
+
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
387 |
+
" color: var(--sklearn-color-background);\n",
|
388 |
+
" text-decoration: none;\n",
|
389 |
+
"}\n",
|
390 |
+
"\n",
|
391 |
+
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
392 |
+
".sk-estimator-doc-link.fitted:hover,\n",
|
393 |
+
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
394 |
+
".sk-estimator-doc-link.fitted:hover {\n",
|
395 |
+
" /* fitted */\n",
|
396 |
+
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
397 |
+
" color: var(--sklearn-color-background);\n",
|
398 |
+
" text-decoration: none;\n",
|
399 |
+
"}\n",
|
400 |
+
"\n",
|
401 |
+
"/* Span, style for the box shown on hovering the info icon */\n",
|
402 |
+
".sk-estimator-doc-link span {\n",
|
403 |
+
" display: none;\n",
|
404 |
+
" z-index: 9999;\n",
|
405 |
+
" position: relative;\n",
|
406 |
+
" font-weight: normal;\n",
|
407 |
+
" right: .2ex;\n",
|
408 |
+
" padding: .5ex;\n",
|
409 |
+
" margin: .5ex;\n",
|
410 |
+
" width: min-content;\n",
|
411 |
+
" min-width: 20ex;\n",
|
412 |
+
" max-width: 50ex;\n",
|
413 |
+
" color: var(--sklearn-color-text);\n",
|
414 |
+
" box-shadow: 2pt 2pt 4pt #999;\n",
|
415 |
+
" /* unfitted */\n",
|
416 |
+
" background: var(--sklearn-color-unfitted-level-0);\n",
|
417 |
+
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
418 |
+
"}\n",
|
419 |
+
"\n",
|
420 |
+
".sk-estimator-doc-link.fitted span {\n",
|
421 |
+
" /* fitted */\n",
|
422 |
+
" background: var(--sklearn-color-fitted-level-0);\n",
|
423 |
+
" border: var(--sklearn-color-fitted-level-3);\n",
|
424 |
+
"}\n",
|
425 |
+
"\n",
|
426 |
+
".sk-estimator-doc-link:hover span {\n",
|
427 |
+
" display: block;\n",
|
428 |
+
"}\n",
|
429 |
+
"\n",
|
430 |
+
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
431 |
+
"\n",
|
432 |
+
"#sk-container-id-1 a.estimator_doc_link {\n",
|
433 |
+
" float: right;\n",
|
434 |
+
" font-size: 1rem;\n",
|
435 |
+
" line-height: 1em;\n",
|
436 |
+
" font-family: monospace;\n",
|
437 |
+
" background-color: var(--sklearn-color-background);\n",
|
438 |
+
" border-radius: 1rem;\n",
|
439 |
+
" height: 1rem;\n",
|
440 |
+
" width: 1rem;\n",
|
441 |
+
" text-decoration: none;\n",
|
442 |
+
" /* unfitted */\n",
|
443 |
+
" color: var(--sklearn-color-unfitted-level-1);\n",
|
444 |
+
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
445 |
+
"}\n",
|
446 |
+
"\n",
|
447 |
+
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
|
448 |
+
" /* fitted */\n",
|
449 |
+
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
450 |
+
" color: var(--sklearn-color-fitted-level-1);\n",
|
451 |
+
"}\n",
|
452 |
+
"\n",
|
453 |
+
"/* On hover */\n",
|
454 |
+
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
|
455 |
+
" /* unfitted */\n",
|
456 |
+
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
457 |
+
" color: var(--sklearn-color-background);\n",
|
458 |
+
" text-decoration: none;\n",
|
459 |
+
"}\n",
|
460 |
+
"\n",
|
461 |
+
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
|
462 |
+
" /* fitted */\n",
|
463 |
+
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
464 |
+
"}\n",
|
465 |
+
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>PySRRegressor.equations_ = None</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> PySRRegressor<span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>PySRRegressor.equations_ = None</pre></div> </div></div></div></div>"
|
466 |
+
],
|
467 |
+
"text/plain": [
|
468 |
+
"PySRRegressor.equations_ = None"
|
469 |
+
]
|
470 |
+
},
|
471 |
+
"execution_count": 4,
|
472 |
+
"metadata": {},
|
473 |
+
"output_type": "execute_result"
|
474 |
+
}
|
475 |
+
],
|
476 |
+
"source": [
|
477 |
+
"rstate = np.random.RandomState(0)\n",
|
478 |
+
"X = np.random.randn(10, 2)\n",
|
479 |
+
"y = np.random.randn(10)\n",
|
480 |
+
"\n",
|
481 |
+
"model = PySRRegressor(deterministic=True, multithreading=False, procs=0, random_state=0, verbosity=0, progress=False)\n",
|
482 |
+
"model"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
{
|
486 |
+
"cell_type": "code",
|
487 |
+
"execution_count": 5,
|
488 |
+
"metadata": {},
|
489 |
+
"outputs": [
|
490 |
+
{
|
491 |
+
"name": "stderr",
|
492 |
+
"output_type": "stream",
|
493 |
+
"text": [
|
494 |
+
"/Users/mcranmer/PermaDocuments/SymbolicRegressionMonorepo/.venv/lib/python3.12/site-packages/pysr/sr.py:1297: UserWarning: Note: it looks like you are running in Jupyter. The progress bar will be turned off.\n",
|
495 |
+
" warnings.warn(\n"
|
496 |
+
]
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"data": {
|
500 |
+
"text/plain": [
|
501 |
+
"pandas.core.frame.DataFrame"
|
502 |
+
]
|
503 |
+
},
|
504 |
+
"execution_count": 5,
|
505 |
+
"metadata": {},
|
506 |
+
"output_type": "execute_result"
|
507 |
+
}
|
508 |
+
],
|
509 |
+
"source": [
|
510 |
+
"model.fit(X, y)\n",
|
511 |
+
"type(model.equations_)"
|
512 |
+
]
|
513 |
+
}
|
514 |
+
],
|
515 |
+
"metadata": {
|
516 |
+
"kernelspec": {
|
517 |
+
"display_name": "Python 3 (ipykernel)",
|
518 |
+
"language": "python",
|
519 |
+
"name": "python3"
|
520 |
+
},
|
521 |
+
"language_info": {
|
522 |
+
"codemirror_mode": {
|
523 |
+
"name": "ipython",
|
524 |
+
"version": 3
|
525 |
+
},
|
526 |
+
"file_extension": ".py",
|
527 |
+
"mimetype": "text/x-python",
|
528 |
+
"name": "python",
|
529 |
+
"nbconvert_exporter": "python",
|
530 |
+
"pygments_lexer": "ipython3",
|
531 |
+
"version": "3.12.0"
|
532 |
+
}
|
533 |
+
},
|
534 |
+
"nbformat": 4,
|
535 |
+
"nbformat_minor": 2
|
536 |
+
}
|
pysr/test/test_startup.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import platform
|
3 |
+
import subprocess
|
4 |
+
import sys
|
5 |
+
import tempfile
|
6 |
+
import textwrap
|
7 |
+
import unittest
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
|
12 |
+
from .. import PySRRegressor
|
13 |
+
from ..julia_import import jl_version
|
14 |
+
from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
|
15 |
+
|
16 |
+
|
17 |
+
class TestStartup(unittest.TestCase):
|
18 |
+
"""Various tests related to starting up PySR."""
|
19 |
+
|
20 |
+
def setUp(self):
|
21 |
+
# Using inspect,
|
22 |
+
# get default niterations from PySRRegressor, and double them:
|
23 |
+
self.default_test_kwargs = dict(
|
24 |
+
progress=False,
|
25 |
+
model_selection="accuracy",
|
26 |
+
niterations=DEFAULT_NITERATIONS * 2,
|
27 |
+
populations=DEFAULT_POPULATIONS * 2,
|
28 |
+
temp_equation_file=True,
|
29 |
+
)
|
30 |
+
self.rstate = np.random.RandomState(0)
|
31 |
+
self.X = self.rstate.randn(100, 5)
|
32 |
+
|
33 |
+
def test_warm_start_from_file(self):
|
34 |
+
"""Test that we can warm start in another process."""
|
35 |
+
if platform.system() == "Windows":
|
36 |
+
self.skipTest("Warm start test incompatible with Windows")
|
37 |
+
|
38 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
39 |
+
model = PySRRegressor(
|
40 |
+
**self.default_test_kwargs,
|
41 |
+
unary_operators=["cos"],
|
42 |
+
)
|
43 |
+
model.warm_start = True
|
44 |
+
model.temp_equation_file = False
|
45 |
+
model.equation_file = Path(tmpdirname) / "equations.csv"
|
46 |
+
model.deterministic = True
|
47 |
+
model.multithreading = False
|
48 |
+
model.random_state = 0
|
49 |
+
model.procs = 0
|
50 |
+
model.early_stop_condition = 1e-10
|
51 |
+
|
52 |
+
rstate = np.random.RandomState(0)
|
53 |
+
X = rstate.randn(100, 2)
|
54 |
+
y = np.cos(X[:, 0]) ** 2
|
55 |
+
model.fit(X, y)
|
56 |
+
|
57 |
+
best_loss = model.equations_.iloc[-1]["loss"]
|
58 |
+
|
59 |
+
# Save X and y to a file:
|
60 |
+
X_file = Path(tmpdirname) / "X.npy"
|
61 |
+
y_file = Path(tmpdirname) / "y.npy"
|
62 |
+
np.save(X_file, X)
|
63 |
+
np.save(y_file, y)
|
64 |
+
# Now, create a new process and warm start from the file:
|
65 |
+
result = subprocess.run(
|
66 |
+
[
|
67 |
+
sys.executable,
|
68 |
+
"-c",
|
69 |
+
textwrap.dedent(
|
70 |
+
f"""
|
71 |
+
from pysr import PySRRegressor
|
72 |
+
import numpy as np
|
73 |
+
|
74 |
+
X = np.load("{X_file}")
|
75 |
+
y = np.load("{y_file}")
|
76 |
+
|
77 |
+
print("Loading model from file")
|
78 |
+
model = PySRRegressor.from_file("{model.equation_file}")
|
79 |
+
|
80 |
+
assert model.julia_state_ is not None
|
81 |
+
|
82 |
+
# Reset saved equations; should be loaded from state!
|
83 |
+
model.equations_ = None
|
84 |
+
model.equation_file_contents_ = None
|
85 |
+
|
86 |
+
model.warm_start = True
|
87 |
+
model.niterations = 0
|
88 |
+
model.max_evals = 0
|
89 |
+
model.ncycles_per_iteration = 0
|
90 |
+
|
91 |
+
model.fit(X, y)
|
92 |
+
|
93 |
+
best_loss = model.equations_.iloc[-1]["loss"]
|
94 |
+
|
95 |
+
assert best_loss <= {best_loss}
|
96 |
+
"""
|
97 |
+
),
|
98 |
+
],
|
99 |
+
stdout=subprocess.PIPE,
|
100 |
+
stderr=subprocess.PIPE,
|
101 |
+
env=os.environ,
|
102 |
+
)
|
103 |
+
self.assertEqual(result.returncode, 0)
|
104 |
+
self.assertIn("Loading model from file", result.stdout.decode())
|
105 |
+
self.assertIn("Started!", result.stderr.decode())
|
106 |
+
|
107 |
+
def test_bad_startup_options(self):
|
108 |
+
warning_tests = [
|
109 |
+
dict(
|
110 |
+
code='import os; os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "no"; import pysr',
|
111 |
+
msg="PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set",
|
112 |
+
),
|
113 |
+
dict(
|
114 |
+
code='import os; os.environ["PYTHON_JULIACALL_THREADS"] = "1"; import pysr',
|
115 |
+
msg="PYTHON_JULIACALL_THREADS environment variable is set",
|
116 |
+
),
|
117 |
+
dict(
|
118 |
+
code="import juliacall; import pysr",
|
119 |
+
msg="juliacall module already imported.",
|
120 |
+
),
|
121 |
+
dict(
|
122 |
+
code='import os; os.environ["PYSR_AUTOLOAD_EXTENSIONS"] = "foo"; import pysr',
|
123 |
+
msg="PYSR_AUTOLOAD_EXTENSIONS environment variable is set",
|
124 |
+
),
|
125 |
+
]
|
126 |
+
for warning_test in warning_tests:
|
127 |
+
result = subprocess.run(
|
128 |
+
[sys.executable, "-c", warning_test["code"]],
|
129 |
+
stdout=subprocess.PIPE,
|
130 |
+
stderr=subprocess.PIPE,
|
131 |
+
env=os.environ,
|
132 |
+
)
|
133 |
+
self.assertIn(warning_test["msg"], result.stderr.decode())
|
134 |
+
|
135 |
+
def test_notebook(self):
|
136 |
+
if jl_version < (1, 9, 0):
|
137 |
+
self.skipTest("Julia version too old")
|
138 |
+
if platform.system() == "Windows":
|
139 |
+
self.skipTest("Notebook test incompatible with Windows")
|
140 |
+
result = subprocess.run(
|
141 |
+
[
|
142 |
+
sys.executable,
|
143 |
+
"-m",
|
144 |
+
"pytest",
|
145 |
+
"--nbval",
|
146 |
+
str(Path(__file__).parent / "test_nb.ipynb"),
|
147 |
+
"--nbval-sanitize-with",
|
148 |
+
str(Path(__file__).parent / "nb_sanitize.cfg"),
|
149 |
+
],
|
150 |
+
env=os.environ,
|
151 |
+
)
|
152 |
+
self.assertEqual(result.returncode, 0)
|
153 |
+
|
154 |
+
|
155 |
+
def runtests(just_tests=False):
|
156 |
+
tests = [TestStartup]
|
157 |
+
if just_tests:
|
158 |
+
return tests
|
159 |
+
suite = unittest.TestSuite()
|
160 |
+
loader = unittest.TestLoader()
|
161 |
+
for test in tests:
|
162 |
+
suite.addTests(loader.loadTestsFromTestCase(test))
|
163 |
+
runner = unittest.TextTestRunner()
|
164 |
+
return runner.run(suite)
|
pysr/test/test_torch.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import platform
|
2 |
import unittest
|
3 |
|
4 |
import numpy as np
|
@@ -7,42 +6,28 @@ import sympy
|
|
7 |
|
8 |
from .. import PySRRegressor, sympy2torch
|
9 |
|
10 |
-
# Need to initialize Julia before importing torch...
|
11 |
-
|
12 |
-
|
13 |
-
def _import_torch():
|
14 |
-
if platform.system() == "Darwin":
|
15 |
-
# Import PyJulia, then Torch
|
16 |
-
from ..julia_helpers import init_julia
|
17 |
-
|
18 |
-
init_julia()
|
19 |
-
|
20 |
-
import torch
|
21 |
-
else:
|
22 |
-
# Import Torch, then PyJulia
|
23 |
-
# https://github.com/pytorch/pytorch/issues/78829
|
24 |
-
import torch
|
25 |
-
return torch
|
26 |
-
|
27 |
|
28 |
class TestTorch(unittest.TestCase):
|
29 |
def setUp(self):
|
30 |
np.random.seed(0)
|
31 |
|
|
|
|
|
|
|
|
|
|
|
32 |
def test_sympy2torch(self):
|
33 |
-
torch = _import_torch()
|
34 |
x, y, z = sympy.symbols("x y z")
|
35 |
cosx = 1.0 * sympy.cos(x) + y
|
36 |
|
37 |
-
X = torch.tensor(np.random.randn(1000, 3))
|
38 |
-
true = 1.0 * torch.cos(X[:, 0]) + X[:, 1]
|
39 |
torch_module = sympy2torch(cosx, [x, y, z])
|
40 |
self.assertTrue(
|
41 |
np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
|
42 |
)
|
43 |
|
44 |
def test_pipeline_pandas(self):
|
45 |
-
torch = _import_torch()
|
46 |
X = pd.DataFrame(np.random.randn(100, 10))
|
47 |
y = np.ones(X.shape[0])
|
48 |
model = PySRRegressor(
|
@@ -71,13 +56,12 @@ class TestTorch(unittest.TestCase):
|
|
71 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
72 |
|
73 |
np.testing.assert_almost_equal(
|
74 |
-
tformat(torch.tensor(X.values)).detach().numpy(),
|
75 |
np.square(np.cos(X.values[:, 1])), # Selection 1st feature
|
76 |
decimal=3,
|
77 |
)
|
78 |
|
79 |
def test_pipeline(self):
|
80 |
-
torch = _import_torch()
|
81 |
X = np.random.randn(100, 10)
|
82 |
y = np.ones(X.shape[0])
|
83 |
model = PySRRegressor(
|
@@ -106,22 +90,22 @@ class TestTorch(unittest.TestCase):
|
|
106 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
107 |
|
108 |
np.testing.assert_almost_equal(
|
109 |
-
tformat(torch.tensor(X)).detach().numpy(),
|
110 |
np.square(np.cos(X[:, 1])), # 2nd feature
|
111 |
decimal=3,
|
112 |
)
|
113 |
|
114 |
def test_mod_mapping(self):
|
115 |
-
torch = _import_torch()
|
116 |
x, y, z = sympy.symbols("x y z")
|
117 |
expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z
|
118 |
|
119 |
module = sympy2torch(expression, [x, y, z])
|
120 |
|
121 |
-
X = torch.rand(100, 3).float() * 10
|
122 |
|
123 |
true_out = (
|
124 |
-
X[:, 0] ** 2
|
|
|
125 |
)
|
126 |
torch_out = module(X)
|
127 |
|
@@ -130,7 +114,6 @@ class TestTorch(unittest.TestCase):
|
|
130 |
)
|
131 |
|
132 |
def test_custom_operator(self):
|
133 |
-
torch = _import_torch()
|
134 |
X = np.random.randn(100, 3)
|
135 |
y = np.ones(X.shape[0])
|
136 |
model = PySRRegressor(
|
@@ -156,7 +139,7 @@ class TestTorch(unittest.TestCase):
|
|
156 |
model.set_params(
|
157 |
equation_file="equation_file_custom_operator.csv",
|
158 |
extra_sympy_mappings={"mycustomoperator": sympy.sin},
|
159 |
-
extra_torch_mappings={"mycustomoperator": torch.sin},
|
160 |
)
|
161 |
model.refresh(checkpoint_file="equation_file_custom_operator.csv")
|
162 |
self.assertEqual(str(model.sympy()), "sin(x1)")
|
@@ -165,13 +148,12 @@ class TestTorch(unittest.TestCase):
|
|
165 |
tformat = model.pytorch()
|
166 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
|
167 |
np.testing.assert_almost_equal(
|
168 |
-
tformat(torch.tensor(X)).detach().numpy(),
|
169 |
np.sin(X[:, 1]),
|
170 |
decimal=3,
|
171 |
)
|
172 |
|
173 |
def test_feature_selection_custom_operators(self):
|
174 |
-
torch = _import_torch()
|
175 |
rstate = np.random.RandomState(0)
|
176 |
X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
|
177 |
cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
|
@@ -196,16 +178,20 @@ class TestTorch(unittest.TestCase):
|
|
196 |
|
197 |
np_output = model.predict(X.values)
|
198 |
|
199 |
-
torch_output = torch_module(torch.tensor(X.values)).detach().numpy()
|
200 |
|
201 |
np.testing.assert_almost_equal(y.values, np_output, decimal=3)
|
202 |
np.testing.assert_almost_equal(y.values, torch_output, decimal=3)
|
203 |
|
204 |
|
205 |
-
def runtests():
|
206 |
"""Run all tests in test_torch.py."""
|
|
|
|
|
|
|
207 |
loader = unittest.TestLoader()
|
208 |
suite = unittest.TestSuite()
|
209 |
-
|
|
|
210 |
runner = unittest.TextTestRunner()
|
211 |
return runner.run(suite)
|
|
|
|
|
1 |
import unittest
|
2 |
|
3 |
import numpy as np
|
|
|
6 |
|
7 |
from .. import PySRRegressor, sympy2torch
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
class TestTorch(unittest.TestCase):
|
11 |
def setUp(self):
|
12 |
np.random.seed(0)
|
13 |
|
14 |
+
# Need to import after juliacall:
|
15 |
+
import torch
|
16 |
+
|
17 |
+
self.torch = torch
|
18 |
+
|
19 |
def test_sympy2torch(self):
|
|
|
20 |
x, y, z = sympy.symbols("x y z")
|
21 |
cosx = 1.0 * sympy.cos(x) + y
|
22 |
|
23 |
+
X = self.torch.tensor(np.random.randn(1000, 3))
|
24 |
+
true = 1.0 * self.torch.cos(X[:, 0]) + X[:, 1]
|
25 |
torch_module = sympy2torch(cosx, [x, y, z])
|
26 |
self.assertTrue(
|
27 |
np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
|
28 |
)
|
29 |
|
30 |
def test_pipeline_pandas(self):
|
|
|
31 |
X = pd.DataFrame(np.random.randn(100, 10))
|
32 |
y = np.ones(X.shape[0])
|
33 |
model = PySRRegressor(
|
|
|
56 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
57 |
|
58 |
np.testing.assert_almost_equal(
|
59 |
+
tformat(self.torch.tensor(X.values)).detach().numpy(),
|
60 |
np.square(np.cos(X.values[:, 1])), # Selection 1st feature
|
61 |
decimal=3,
|
62 |
)
|
63 |
|
64 |
def test_pipeline(self):
|
|
|
65 |
X = np.random.randn(100, 10)
|
66 |
y = np.ones(X.shape[0])
|
67 |
model = PySRRegressor(
|
|
|
90 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
91 |
|
92 |
np.testing.assert_almost_equal(
|
93 |
+
tformat(self.torch.tensor(X)).detach().numpy(),
|
94 |
np.square(np.cos(X[:, 1])), # 2nd feature
|
95 |
decimal=3,
|
96 |
)
|
97 |
|
98 |
def test_mod_mapping(self):
|
|
|
99 |
x, y, z = sympy.symbols("x y z")
|
100 |
expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z
|
101 |
|
102 |
module = sympy2torch(expression, [x, y, z])
|
103 |
|
104 |
+
X = self.torch.rand(100, 3).float() * 10
|
105 |
|
106 |
true_out = (
|
107 |
+
X[:, 0] ** 2
|
108 |
+
+ self.torch.atanh(self.torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2]
|
109 |
)
|
110 |
torch_out = module(X)
|
111 |
|
|
|
114 |
)
|
115 |
|
116 |
def test_custom_operator(self):
|
|
|
117 |
X = np.random.randn(100, 3)
|
118 |
y = np.ones(X.shape[0])
|
119 |
model = PySRRegressor(
|
|
|
139 |
model.set_params(
|
140 |
equation_file="equation_file_custom_operator.csv",
|
141 |
extra_sympy_mappings={"mycustomoperator": sympy.sin},
|
142 |
+
extra_torch_mappings={"mycustomoperator": self.torch.sin},
|
143 |
)
|
144 |
model.refresh(checkpoint_file="equation_file_custom_operator.csv")
|
145 |
self.assertEqual(str(model.sympy()), "sin(x1)")
|
|
|
148 |
tformat = model.pytorch()
|
149 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
|
150 |
np.testing.assert_almost_equal(
|
151 |
+
tformat(self.torch.tensor(X)).detach().numpy(),
|
152 |
np.sin(X[:, 1]),
|
153 |
decimal=3,
|
154 |
)
|
155 |
|
156 |
def test_feature_selection_custom_operators(self):
|
|
|
157 |
rstate = np.random.RandomState(0)
|
158 |
X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
|
159 |
cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
|
|
|
178 |
|
179 |
np_output = model.predict(X.values)
|
180 |
|
181 |
+
torch_output = torch_module(self.torch.tensor(X.values)).detach().numpy()
|
182 |
|
183 |
np.testing.assert_almost_equal(y.values, np_output, decimal=3)
|
184 |
np.testing.assert_almost_equal(y.values, torch_output, decimal=3)
|
185 |
|
186 |
|
187 |
+
def runtests(just_tests=False):
|
188 |
"""Run all tests in test_torch.py."""
|
189 |
+
tests = [TestTorch]
|
190 |
+
if just_tests:
|
191 |
+
return tests
|
192 |
loader = unittest.TestLoader()
|
193 |
suite = unittest.TestSuite()
|
194 |
+
for test in tests:
|
195 |
+
suite.addTests(loader.loadTestsFromTestCase(test))
|
196 |
runner = unittest.TextTestRunner()
|
197 |
return runner.run(suite)
|
pysr/version.py
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
__version__ = "0.16.9"
|
2 |
-
__symbolic_regression_jl_version__ = "0.23.1"
|
|
|
|
|
|