MilesCranmer commited on
Commit
5908dc9
·
1 Parent(s): 0557713

Add sympy and score as output

Browse files
Files changed (3) hide show
  1. README.md +8 -1
  2. pysr/sr.py +71 -2
  3. setup.py +2 -1
README.md CHANGED
@@ -44,7 +44,7 @@ Then, at the command line,
44
  install the `Optim` and `SpecialFunctions` packages via:
45
  `julia -e 'import Pkg; Pkg.add("Optim"); Pkg.add("SpecialFunctions")'`.
46
 
47
- For python, you need to have Python 3, numpy, and pandas installed.
48
 
49
  You can install this package from PyPI with:
50
 
@@ -81,6 +81,12 @@ which gives:
81
  2 11 0.000000 plus(plus(mult(x0, x0), cos(x3)), plus(-2.0, cos(x3)))
82
  ```
83
 
 
 
 
 
 
 
84
  ### Custom operators
85
 
86
  One can define custom operators in Julia by passing a string:
@@ -309,4 +315,5 @@ pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
309
  - Maybe I could store the result of calculations in a tree (or an index to a massive array that does this). And only when something in the subtree updates, does the rest of the tree update!
310
  - [ ] Try Memoize.jl instead of manually caching.
311
  - [ ] Try threading over population. Do random sort, compute mutation for each, then replace 10% oldest.
 
312
 
 
44
  install the `Optim` and `SpecialFunctions` packages via:
45
  `julia -e 'import Pkg; Pkg.add("Optim"); Pkg.add("SpecialFunctions")'`.
46
 
47
+ For python, you need to have Python 3, numpy, sympy, and pandas installed.
48
 
49
  You can install this package from PyPI with:
50
 
 
81
  2 11 0.000000 plus(plus(mult(x0, x0), cos(x3)), plus(-2.0, cos(x3)))
82
  ```
83
 
84
+ The newest version of PySR also returns three additional columns:
85
+
86
+ - `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
87
+ - `sympy_format` - sympy equation.
88
+ - `lambda_format` - a lambda function for that equation, that you can pass values through.
89
+
90
  ### Custom operators
91
 
92
  One can define custom operators in Julia by passing a string:
 
315
  - Maybe I could store the result of calculations in a tree (or an index to a massive array that does this). And only when something in the subtree updates, does the rest of the tree update!
316
  - [ ] Try Memoize.jl instead of manually caching.
317
  - [ ] Try threading over population. Do random sort, compute mutation for each, then replace 10% oldest.
318
+ - [ ] Call function to read from csv after running
319
 
pysr/sr.py CHANGED
@@ -4,6 +4,41 @@ from collections import namedtuple
4
  import pathlib
5
  import numpy as np
6
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def pysr(X=None, y=None, weights=None,
9
  procs=4,
@@ -33,6 +68,7 @@ def pysr(X=None, y=None, weights=None,
33
  perturbationFactor=1.0,
34
  nrestarts=3,
35
  timeout=None,
 
36
  equation_file='hall_of_fame.csv',
37
  test='simple1',
38
  verbosity=1e9,
@@ -112,6 +148,11 @@ def pysr(X=None, y=None, weights=None,
112
  if populations is None:
113
  populations = procs
114
 
 
 
 
 
 
115
  rand_string = f'{"".join([str(np.random.rand())[2] for i in range(20)])}'
116
 
117
  if isinstance(binary_operators, str): binary_operators = [binary_operators]
@@ -225,6 +266,34 @@ const weights = convert(Array{Float32, 1}, """f"{weight_str})"
225
  output = pd.read_csv(equation_file, sep="|")
226
  except FileNotFoundError:
227
  print("Couldn't find equation file!")
228
- output = pd.DataFrame()
229
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
 
4
  import pathlib
5
  import numpy as np
6
  import pandas as pd
7
+ import sympy
8
+ from sympy import sympify, Symbol, lambdify
9
+
10
+ sympy_mappings = {
11
+ 'div': lambda x, y : x/y,
12
+ 'mult': lambda x, y : x*y,
13
+ 'plus': lambda x, y : x + y,
14
+ 'neg': lambda x : -x,
15
+ 'pow': lambda x, y : sympy.sign(x)*sympy.Abs(x)**y,
16
+ 'cos': lambda x : sympy.cos(x),
17
+ 'sin': lambda x : sympy.sin(x),
18
+ 'tan': lambda x : sympy.tan(x),
19
+ 'cosh': lambda x : sympy.cosh(x),
20
+ 'sinh': lambda x : sympy.sinh(x),
21
+ 'tanh': lambda x : sympy.tanh(x),
22
+ 'exp': lambda x : sympy.exp(x),
23
+ 'acos': lambda x : sympy.acos(x),
24
+ 'asin': lambda x : sympy.asin(x),
25
+ 'atan': lambda x : sympy.atan(x),
26
+ 'acosh':lambda x : sympy.acosh(x),
27
+ 'asinh':lambda x : sympy.asinh(x),
28
+ 'atanh':lambda x : sympy.atanh(x),
29
+ 'abs': lambda x : sympy.Abs(x),
30
+ 'mod': lambda x, y : sympy.Mod(x, y),
31
+ 'erf': lambda x : sympy.erf(x),
32
+ 'erfc': lambda x : sympy.erfc(x),
33
+ 'logm': lambda x : sympy.log(sympy.Abs(x)),
34
+ 'logm10':lambda x : sympy.log10(sympy.Abs(x)),
35
+ 'logm2': lambda x : sympy.log2(sympy.Abs(x)),
36
+ 'log1p': lambda x : sympy.log(x + 1),
37
+ 'floor': lambda x : sympy.floor(x),
38
+ 'ceil': lambda x : sympy.ceil(x),
39
+ 'sign': lambda x : sympy.sign(x),
40
+ 'round': lambda x : sympy.round(x),
41
+ }
42
 
43
  def pysr(X=None, y=None, weights=None,
44
  procs=4,
 
68
  perturbationFactor=1.0,
69
  nrestarts=3,
70
  timeout=None,
71
+ extra_sympy_mappings={},
72
  equation_file='hall_of_fame.csv',
73
  test='simple1',
74
  verbosity=1e9,
 
148
  if populations is None:
149
  populations = procs
150
 
151
+ local_sympy_mappings = {
152
+ **extra_sympy_mappings,
153
+ **sympy_mappings
154
+ }
155
+
156
  rand_string = f'{"".join([str(np.random.rand())[2] for i in range(20)])}'
157
 
158
  if isinstance(binary_operators, str): binary_operators = [binary_operators]
 
266
  output = pd.read_csv(equation_file, sep="|")
267
  except FileNotFoundError:
268
  print("Couldn't find equation file!")
269
+ return pd.DataFrame()
270
+
271
+ scores = []
272
+ lastMSE = None
273
+ lastComplexity = 0
274
+ sympy_format = []
275
+ lambda_format = []
276
+ sympy_symbols = [sympy.Symbol('x%d'%i) for i in range(X.shape[1])]
277
+ for i in range(len(output)):
278
+ eqn = sympify(output.loc[i, 'Equation'], locals=local_sympy_mappings)
279
+ sympy_format.append(eqn)
280
+ lambda_format.append(lambdify(sympy_symbols, eqn))
281
+ curMSE = output.loc[i, 'MSE']
282
+ curComplexity = output.loc[i, 'Complexity']
283
+
284
+ if lastMSE is None:
285
+ cur_score = 0.0
286
+ else:
287
+ cur_score = np.log(curMSE/lastMSE)/(curComplexity - lastComplexity)
288
+
289
+ scores.append(cur_score)
290
+ lastMSE = curMSE
291
+ lastComplexity = curComplexity
292
+
293
+
294
+ output['score'] = np.array(scores)
295
+ output['sympy_format'] = sympy_format
296
+ output['lambda_format'] = lambda_format
297
+ return output[['Complexity', 'MSE', 'score', 'Equation', 'sympy_format', 'lambda_format']]
298
+
299
 
setup.py CHANGED
@@ -14,7 +14,8 @@ setuptools.setup(
14
  url="https://github.com/MilesCranmer/pysr",
15
  install_requires=[
16
  "numpy",
17
- "pandas"
 
18
  ],
19
  packages=setuptools.find_packages(),
20
  package_data={
 
14
  url="https://github.com/MilesCranmer/pysr",
15
  install_requires=[
16
  "numpy",
17
+ "pandas",
18
+ "sympy"
19
  ],
20
  packages=setuptools.find_packages(),
21
  package_data={