MilesCranmer commited on
Commit
563d8eb
·
unverified ·
2 Parent(s): 3565353 1e1bd80

Merge pull request #389 from MilesCranmer/backend-update-0.21.2

Browse files
docs/examples.md CHANGED
@@ -433,9 +433,97 @@ equal to:
433
  $\frac{x_0^2 x_1 - 2.0000073}{x_2^2 - 1.0000019}$, which
434
  is nearly the same as the true equation!
435
 
 
436
 
 
 
 
437
 
438
- ## 10. Additional features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
  For the many other features available in PySR, please
441
  read the [Options section](options.md).
 
433
  $\frac{x_0^2 x_1 - 2.0000073}{x_2^2 - 1.0000019}$, which
434
  is nearly the same as the true equation!
435
 
436
+ ## 10. Dimensional constraints
437
 
438
+ One other feature we can exploit is dimensional analysis.
439
+ Say that we know the physical units of each feature and output,
440
+ and we want to find an expression that is dimensionally consistent.
441
 
442
+ We can do this as follows, using `DynamicQuantities.jl` to assign units,
443
+ passing a string specifying the units for each variable.
444
+ First, let's make some data on Newton's law of gravitation, using
445
+ astropy for units:
446
+
447
+ ```python
448
+ import numpy as np
449
+ from astropy import units as u, constants as const
450
+
451
+ M = (np.random.rand(100) + 0.1) * const.M_sun
452
+ m = 100 * (np.random.rand(100) + 0.1) * u.kg
453
+ r = (np.random.rand(100) + 0.1) * const.R_earth
454
+ G = const.G
455
+
456
+ F = G * M * m / r**2
457
+ ```
458
+
459
+ We can see the units of `F` with `F.unit`.
460
+
461
+ Now, let's create our model.
462
+ Since this data has such a large dynamic range,
463
+ let's also create a custom loss function
464
+ that looks at the error in log-space:
465
+
466
+ ```python
467
+ loss = """function loss_fnc(prediction, target)
468
+ scatter_loss = abs(log((abs(prediction)+1e-20) / (abs(target)+1e-20)))
469
+ sign_loss = 10 * (sign(prediction) - sign(target))^2
470
+ return scatter_loss + sign_loss
471
+ end
472
+ """
473
+ ```
474
+
475
+ Now let's define our model:
476
+
477
+ ```python
478
+ model = PySRRegressor(
479
+ binary_operators=["+", "-", "*", "/"],
480
+ unary_operators=["square"],
481
+ loss=loss,
482
+ complexity_of_constants=2,
483
+ maxsize=25,
484
+ niterations=100,
485
+ populations=50,
486
+ # Amount to penalize dimensional violations:
487
+ dimensional_constraint_penalty=10**5,
488
+ )
489
+ ```
490
+
491
+ and fit it, passing the unit information.
492
+ To do this, we need to use the format of [DynamicQuantities.jl](https://symbolicml.org/DynamicQuantities.jl/dev/#Usage).
493
+
494
+ ```python
495
+ # Get numerical arrays to fit:
496
+ X = pd.DataFrame(dict(
497
+ M=M.value,
498
+ m=m.value,
499
+ r=r.value,
500
+ ))
501
+ y = F.value
502
+
503
+ model.fit(
504
+ X,
505
+ y,
506
+ X_units=["Constants.M_sun", "kg", "Constants.R_earth"],
507
+ y_units="kg * m / s^2"
508
+ )
509
+ ```
510
+
511
+ You can observe that all expressions with a loss under
512
+ our penalty are dimensionally consistent!
513
+ (The `"[⋅]"` indicates free units in a constant, which can cancel out other units in the expression.)
514
+ For example,
515
+
516
+ ```julia
517
+ "y[m s⁻² kg] = (M[kg] * 2.6353e-22[⋅])"
518
+ ```
519
+
520
+ would indicate that the expression is dimensionally consistent, with
521
+ a constant `"2.6353e-22[m s⁻²]"`.
522
+
523
+ Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
524
+
525
+
526
+ ## 11. Additional features
527
 
528
  For the many other features available in PySR, please
529
  read the [Options section](options.md).
docs/gen_param_docs.py CHANGED
@@ -53,7 +53,7 @@ def str_param_groups(param_groupings, params, cur_heading=2):
53
  if __name__ == "__main__":
54
  # This is the path to the param_groupings.yml file
55
  # relative to the current file.
56
- path = "param_groupings.yml"
57
  with open(path, "r") as f:
58
  param_groupings = safe_load(f)
59
 
 
53
  if __name__ == "__main__":
54
  # This is the path to the param_groupings.yml file
55
  # relative to the current file.
56
+ path = "../pysr/param_groupings.yml"
57
  with open(path, "r") as f:
58
  param_groupings = safe_load(f)
59
 
pysr/julia_helpers.py CHANGED
@@ -259,6 +259,7 @@ def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=Fa
259
 
260
  def _add_sr_to_julia_project(Main, io_arg):
261
  Main.eval("using Pkg")
 
262
  Main.sr_spec = Main.PackageSpec(
263
  name="SymbolicRegression",
264
  url="https://github.com/MilesCranmer/SymbolicRegression.jl",
@@ -266,8 +267,7 @@ def _add_sr_to_julia_project(Main, io_arg):
266
  )
267
  Main.clustermanagers_spec = Main.PackageSpec(
268
  name="ClusterManagers",
269
- url="https://github.com/JuliaParallel/ClusterManagers.jl",
270
- rev="14e7302f068794099344d5d93f71979aaf4fbeb3",
271
  )
272
  Main.eval(f"Pkg.add([sr_spec, clustermanagers_spec], {io_arg})")
273
 
 
259
 
260
  def _add_sr_to_julia_project(Main, io_arg):
261
  Main.eval("using Pkg")
262
+ Main.eval("Pkg.Registry.update()")
263
  Main.sr_spec = Main.PackageSpec(
264
  name="SymbolicRegression",
265
  url="https://github.com/MilesCranmer/SymbolicRegression.jl",
 
267
  )
268
  Main.clustermanagers_spec = Main.PackageSpec(
269
  name="ClusterManagers",
270
+ version="0.4",
 
271
  )
272
  Main.eval(f"Pkg.add([sr_spec, clustermanagers_spec], {io_arg})")
273
 
{docs → pysr}/param_groupings.yml RENAMED
@@ -13,6 +13,7 @@
13
  - loss
14
  - full_objective
15
  - model_selection
 
16
  - Working with Complexities:
17
  - parsimony
18
  - constraints
@@ -72,12 +73,14 @@
72
  - fast_cycle
73
  - turbo
74
  - enable_autodiff
 
75
  - random_state
76
  - deterministic
77
  - warm_start
78
  - Monitoring:
79
  - verbosity
80
  - update_verbosity
 
81
  - progress
82
  - Environment:
83
  - temp_equation_file
 
13
  - loss
14
  - full_objective
15
  - model_selection
16
+ - dimensional_constraint_penalty
17
  - Working with Complexities:
18
  - parsimony
19
  - constraints
 
73
  - fast_cycle
74
  - turbo
75
  - enable_autodiff
76
+ - Determinism:
77
  - random_state
78
  - deterministic
79
  - warm_start
80
  - Monitoring:
81
  - verbosity
82
  - update_verbosity
83
+ - print_precision
84
  - progress
85
  - Environment:
86
  - temp_equation_file
pysr/sr.py CHANGED
@@ -167,6 +167,8 @@ def _check_assertions(
167
  variable_names,
168
  weights,
169
  y,
 
 
170
  ):
171
  # Check for potential errors before they happen
172
  assert len(X.shape) == 2
@@ -184,12 +186,30 @@ def _check_assertions(
184
  f"Variable name {var_name} is already a function name."
185
  )
186
  # Check if alphanumeric only:
187
- if not re.match(r"^[a-zA-Z0-9_]+$", var_name):
188
  raise ValueError(
189
  f"Invalid variable name {var_name}. "
190
  "Only alphanumeric characters, numbers, "
191
  "and underscores are allowed."
192
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
 
195
  def best(*args, **kwargs): # pragma: no cover
@@ -354,6 +374,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
354
  You may pass a function with the same arguments as this (note
355
  that the name of the function doesn't matter). Here,
356
  both `prediction` and `dataset.y` are 1D arrays of length `dataset.n`.
 
 
 
357
  Default is `None`.
358
  complexity_of_operators : dict[str, float]
359
  If you would like to use a complexity other than 1 for an
@@ -371,6 +394,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
371
  parsimony : float
372
  Multiplicative factor for how much to punish complexity.
373
  Default is `0.0032`.
 
 
 
374
  use_frequency : bool
375
  Whether to measure the frequency of complexities, and use that
376
  instead of parsimony to explore equation space. Will naturally
@@ -551,6 +577,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
551
  What verbosity level to use for package updates.
552
  Will take value of `verbosity` if not given.
553
  Default is `None`.
 
 
554
  progress : bool
555
  Whether to use a progress bar instead of printing to stdout.
556
  Default is `True`.
@@ -633,6 +661,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
633
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
634
  Names of features seen during :term:`fit`. Defined only when `X`
635
  has feature names that are all strings.
 
 
 
 
 
 
636
  nout_ : int
637
  Number of output dimensions.
638
  selection_mask_ : list[int] of length `select_k_features`
@@ -712,6 +746,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
712
  complexity_of_constants=1,
713
  complexity_of_variables=1,
714
  parsimony=0.0032,
 
715
  use_frequency=True,
716
  use_frequency_in_tournament=True,
717
  adaptive_parsimony_scaling=20.0,
@@ -758,6 +793,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
758
  warm_start=False,
759
  verbosity=1e9,
760
  update_verbosity=None,
 
761
  progress=True,
762
  equation_file=None,
763
  temp_equation_file=False,
@@ -802,6 +838,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
802
  self.complexity_of_constants = complexity_of_constants
803
  self.complexity_of_variables = complexity_of_variables
804
  self.parsimony = parsimony
 
805
  self.use_frequency = use_frequency
806
  self.use_frequency_in_tournament = use_frequency_in_tournament
807
  self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
@@ -853,6 +890,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
853
  # - Runtime user interface
854
  self.verbosity = verbosity
855
  self.update_verbosity = update_verbosity
 
856
  self.progress = progress
857
  # - Project management
858
  self.equation_file = equation_file
@@ -976,11 +1014,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
976
 
977
  # Else, we re-create it.
978
  print(
979
- f"{equation_file} does not exist, "
980
  "so we must create the model from scratch."
981
  )
982
- assert binary_operators is not None
983
- assert unary_operators is not None
984
  assert n_features_in is not None
985
 
986
  # TODO: copy .bkup file if exists.
@@ -995,10 +1032,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
995
  model.n_features_in_ = n_features_in
996
 
997
  if feature_names_in is None:
998
- model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
 
 
 
999
  else:
1000
  assert len(feature_names_in) == n_features_in
1001
  model.feature_names_in_ = feature_names_in
 
1002
 
1003
  if selection_mask is None:
1004
  model.selection_mask_ = np.ones(n_features_in, dtype=bool)
@@ -1318,7 +1359,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1318
 
1319
  return packed_modified_params
1320
 
1321
- def _validate_and_set_fit_params(self, X, y, Xresampled, weights, variable_names):
 
 
1322
  """
1323
  Validate the parameters passed to the :term`fit` method.
1324
 
@@ -1340,6 +1383,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1340
  for that particular element of y.
1341
  variable_names : list[str] of length n_features
1342
  Names of each variable in the training dataset, `X`.
 
 
 
 
1343
 
1344
  Returns
1345
  -------
@@ -1351,6 +1398,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1351
  Validated resampled training data used for denoising.
1352
  variable_names_validated : list[str] of length n_features
1353
  Validated list of variable names for each feature in `X`.
 
 
 
 
1354
 
1355
  """
1356
  if isinstance(X, pd.DataFrame):
@@ -1361,7 +1412,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1361
  "Using DataFrame column names instead."
1362
  )
1363
 
1364
- if X.columns.is_object() and X.columns.str.contains(" ").any():
 
 
 
1365
  X.columns = X.columns.str.replace(" ", "_")
1366
  warnings.warn(
1367
  "Spaces in DataFrame column names are not supported. "
@@ -1384,7 +1438,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1384
  weights = check_array(weights, ensure_2d=False)
1385
  check_consistent_length(weights, y)
1386
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1387
- self.feature_names_in_ = _check_feature_names_in(self, variable_names)
 
 
 
 
 
 
 
 
 
 
 
1388
  variable_names = self.feature_names_in_
1389
 
1390
  # Handle multioutput data
@@ -1395,10 +1460,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1395
  else:
1396
  raise NotImplementedError("y shape not supported!")
1397
 
1398
- return X, y, Xresampled, weights, variable_names
 
 
 
1399
 
1400
  def _pre_transform_training_data(
1401
- self, X, y, Xresampled, variable_names, random_state
1402
  ):
1403
  """
1404
  Transform the training data before fitting the symbolic regressor.
@@ -1418,6 +1486,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1418
  variable_names : list[str]
1419
  Names of each variable in the training dataset, `X`.
1420
  Of length `n_features`.
 
 
 
 
1421
  random_state : int | np.RandomState
1422
  Pass an int for reproducible results across multiple function calls.
1423
  See :term:`Glossary <random_state>`. Default is `None`.
@@ -1439,6 +1511,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1439
  variable_names_transformed : list[str] of length n_features
1440
  Names of each variable in the transformed dataset,
1441
  `X_transformed`.
 
 
 
 
1442
  """
1443
  # Feature selection transformation
1444
  if self.select_k_features:
@@ -1453,10 +1529,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1453
  # Reduce variable_names to selection
1454
  variable_names = [variable_names[i] for i in self.selection_mask_]
1455
 
 
 
 
 
1456
  # Re-perform data validation and feature name updating
1457
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1458
  # Update feature names with selected variable names
1459
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
 
1460
  print(f"Using features {self.feature_names_in_}")
1461
 
1462
  # Denoising transformation
@@ -1476,7 +1557,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1476
  else:
1477
  X, y = _denoise(X, y, Xresampled=Xresampled, random_state=random_state)
1478
 
1479
- return X, y, variable_names
1480
 
1481
  def _run(self, X, y, mutated_params, weights, seed):
1482
  """
@@ -1629,6 +1710,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1629
  tournament_selection_n=self.tournament_selection_n,
1630
  # These have the same name:
1631
  parsimony=self.parsimony,
 
1632
  alpha=self.alpha,
1633
  maxdepth=maxdepth,
1634
  fast_cycle=self.fast_cycle,
@@ -1648,6 +1730,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1648
  fraction_replaced=self.fraction_replaced,
1649
  topn=self.topn,
1650
  verbosity=self.verbosity,
 
1651
  optimizer_algorithm=self.optimizer_algorithm,
1652
  optimizer_nrestarts=self.optimizer_nrestarts,
1653
  optimizer_probability=self.optimize_probability,
@@ -1699,6 +1782,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1699
  None if parallelism in ["serial", "multithreading"] else int(self.procs)
1700
  )
1701
 
 
 
 
 
 
 
1702
  # Call to Julia backend.
1703
  # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl
1704
  self.raw_julia_state_ = SymbolicRegression.equation_search(
@@ -1706,7 +1795,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1706
  Main.y,
1707
  weights=Main.weights,
1708
  niterations=int(self.niterations),
1709
- variable_names=self.feature_names_in_.tolist(),
 
 
 
 
 
 
 
 
1710
  options=options,
1711
  numprocs=cprocs,
1712
  parallelism=parallelism,
@@ -1732,6 +1829,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1732
  Xresampled=None,
1733
  weights=None,
1734
  variable_names=None,
 
 
1735
  ):
1736
  """
1737
  Search for equations to fit the dataset and store them in `self.equations_`.
@@ -1759,6 +1858,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1759
  instead of `variable_names`. Cannot contain spaces or special
1760
  characters. Avoid variable names which are also
1761
  function names in `sympy`, such as "N".
 
 
 
 
 
 
 
 
 
1762
 
1763
  Returns
1764
  -------
@@ -1780,6 +1888,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1780
  self.nout_ = 1
1781
  self.selection_mask_ = None
1782
  self.raw_julia_state_ = None
 
 
1783
 
1784
  random_state = check_random_state(self.random_state) # For np random
1785
  seed = random_state.get_state()[1][0] # For julia random
@@ -1788,8 +1898,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1788
 
1789
  mutated_params = self._validate_and_set_init_params()
1790
 
1791
- X, y, Xresampled, weights, variable_names = self._validate_and_set_fit_params(
1792
- X, y, Xresampled, weights, variable_names
 
 
 
 
 
 
 
 
1793
  )
1794
 
1795
  if X.shape[0] > 10000 and not self.batching:
@@ -1804,8 +1922,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1804
  )
1805
 
1806
  # Pre transformations (feature selection and denoising)
1807
- X, y, variable_names = self._pre_transform_training_data(
1808
- X, y, Xresampled, variable_names, random_state
1809
  )
1810
 
1811
  # Warn about large feature counts (still warn if feature count is large
@@ -1834,6 +1952,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1834
  variable_names,
1835
  weights,
1836
  y,
 
 
1837
  )
1838
 
1839
  # Initially, just save model parameters, so that
@@ -2072,17 +2192,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2072
  with open(cur_filename, "r") as f:
2073
  buf = f.read()
2074
  buf = _preprocess_julia_floats(buf)
2075
- df = pd.read_csv(StringIO(buf))
2076
-
2077
- # Rename Complexity column to complexity:
2078
- df.rename(
2079
- columns={
2080
- "Complexity": "complexity",
2081
- "Loss": "loss",
2082
- "Equation": "equation",
2083
- },
2084
- inplace=True,
2085
- )
2086
 
2087
  all_outputs.append(df)
2088
  else:
@@ -2092,15 +2203,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2092
  with open(filename, "r") as f:
2093
  buf = f.read()
2094
  buf = _preprocess_julia_floats(buf)
2095
- all_outputs = [pd.read_csv(StringIO(buf))]
2096
- all_outputs[-1].rename(
2097
- columns={
2098
- "Complexity": "complexity",
2099
- "Loss": "loss",
2100
- "Equation": "equation",
2101
- },
2102
- inplace=True,
2103
- )
2104
 
2105
  except FileNotFoundError:
2106
  raise RuntimeError(
@@ -2109,6 +2212,35 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2109
  )
2110
  return all_outputs
2111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2112
  def get_hof(self):
2113
  """Get the equations from a hall of fame file.
2114
 
@@ -2409,3 +2541,11 @@ def _preprocess_julia_floats(s: str) -> str:
2409
  s = _apply_regexp_im_sci(s)
2410
  s = _apply_regexp_sci(s)
2411
  return s
 
 
 
 
 
 
 
 
 
167
  variable_names,
168
  weights,
169
  y,
170
+ X_units,
171
+ y_units,
172
  ):
173
  # Check for potential errors before they happen
174
  assert len(X.shape) == 2
 
186
  f"Variable name {var_name} is already a function name."
187
  )
188
  # Check if alphanumeric only:
189
+ if not re.match(r"^[₀₁₂₃₄₅₆₇₈₉a-zA-Z0-9_]+$", var_name):
190
  raise ValueError(
191
  f"Invalid variable name {var_name}. "
192
  "Only alphanumeric characters, numbers, "
193
  "and underscores are allowed."
194
  )
195
+ if X_units is not None and len(X_units) != X.shape[1]:
196
+ raise ValueError(
197
+ "The number of units in `X_units` must equal the number of features in `X`."
198
+ )
199
+ if y_units is not None:
200
+ good_y_units = False
201
+ if isinstance(y_units, list):
202
+ if len(y.shape) == 1:
203
+ good_y_units = len(y_units) == 1
204
+ else:
205
+ good_y_units = len(y_units) == y.shape[1]
206
+ else:
207
+ good_y_units = len(y.shape) == 1 or y.shape[1] == 1
208
+
209
+ if not good_y_units:
210
+ raise ValueError(
211
+ "The number of units in `y_units` must equal the number of output features in `y`."
212
+ )
213
 
214
 
215
  def best(*args, **kwargs): # pragma: no cover
 
374
  You may pass a function with the same arguments as this (note
375
  that the name of the function doesn't matter). Here,
376
  both `prediction` and `dataset.y` are 1D arrays of length `dataset.n`.
377
+ If using `batching`, then you should add an
378
+ `idx` argument to the function, which is `nothing`
379
+ for non-batched, and a 1D array of indices for batched.
380
  Default is `None`.
381
  complexity_of_operators : dict[str, float]
382
  If you would like to use a complexity other than 1 for an
 
394
  parsimony : float
395
  Multiplicative factor for how much to punish complexity.
396
  Default is `0.0032`.
397
+ dimensional_constraint_penalty : float
398
+ Additive penalty for if dimensional analysis of an expression fails.
399
+ By default, this is `1000.0`.
400
  use_frequency : bool
401
  Whether to measure the frequency of complexities, and use that
402
  instead of parsimony to explore equation space. Will naturally
 
577
  What verbosity level to use for package updates.
578
  Will take value of `verbosity` if not given.
579
  Default is `None`.
580
+ print_precision : int
581
+ How many significant digits to print for floats. Default is `5`.
582
  progress : bool
583
  Whether to use a progress bar instead of printing to stdout.
584
  Default is `True`.
 
661
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
662
  Names of features seen during :term:`fit`. Defined only when `X`
663
  has feature names that are all strings.
664
+ pretty_feature_names_in_ : ndarray of shape (`n_features_in_`,)
665
+ Pretty names of features, used only during printing.
666
+ X_units_ : list[str] of length n_features
667
+ Units of each variable in the training dataset, `X`.
668
+ y_units_ : str | list[str] of length n_out
669
+ Units of each variable in the training dataset, `y`.
670
  nout_ : int
671
  Number of output dimensions.
672
  selection_mask_ : list[int] of length `select_k_features`
 
746
  complexity_of_constants=1,
747
  complexity_of_variables=1,
748
  parsimony=0.0032,
749
+ dimensional_constraint_penalty=None,
750
  use_frequency=True,
751
  use_frequency_in_tournament=True,
752
  adaptive_parsimony_scaling=20.0,
 
793
  warm_start=False,
794
  verbosity=1e9,
795
  update_verbosity=None,
796
+ print_precision=5,
797
  progress=True,
798
  equation_file=None,
799
  temp_equation_file=False,
 
838
  self.complexity_of_constants = complexity_of_constants
839
  self.complexity_of_variables = complexity_of_variables
840
  self.parsimony = parsimony
841
+ self.dimensional_constraint_penalty = dimensional_constraint_penalty
842
  self.use_frequency = use_frequency
843
  self.use_frequency_in_tournament = use_frequency_in_tournament
844
  self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
 
890
  # - Runtime user interface
891
  self.verbosity = verbosity
892
  self.update_verbosity = update_verbosity
893
+ self.print_precision = print_precision
894
  self.progress = progress
895
  # - Project management
896
  self.equation_file = equation_file
 
1014
 
1015
  # Else, we re-create it.
1016
  print(
1017
+ f"{pkl_filename} does not exist, "
1018
  "so we must create the model from scratch."
1019
  )
1020
+ assert binary_operators is not None or unary_operators is not None
 
1021
  assert n_features_in is not None
1022
 
1023
  # TODO: copy .bkup file if exists.
 
1032
  model.n_features_in_ = n_features_in
1033
 
1034
  if feature_names_in is None:
1035
+ model.feature_names_in_ = np.array([f"x{i}" for i in range(n_features_in)])
1036
+ model.pretty_feature_names_in_ = np.array(
1037
+ [f"x{_subscriptify(i)}" for i in range(n_features_in)]
1038
+ )
1039
  else:
1040
  assert len(feature_names_in) == n_features_in
1041
  model.feature_names_in_ = feature_names_in
1042
+ model.pretty_feature_names_in_ = None
1043
 
1044
  if selection_mask is None:
1045
  model.selection_mask_ = np.ones(n_features_in, dtype=bool)
 
1359
 
1360
  return packed_modified_params
1361
 
1362
+ def _validate_and_set_fit_params(
1363
+ self, X, y, Xresampled, weights, variable_names, X_units, y_units
1364
+ ):
1365
  """
1366
  Validate the parameters passed to the :term`fit` method.
1367
 
 
1383
  for that particular element of y.
1384
  variable_names : list[str] of length n_features
1385
  Names of each variable in the training dataset, `X`.
1386
+ X_units : list[str] of length n_features
1387
+ Units of each variable in the training dataset, `X`.
1388
+ y_units : str | list[str] of length n_out
1389
+ Units of each variable in the training dataset, `y`.
1390
 
1391
  Returns
1392
  -------
 
1398
  Validated resampled training data used for denoising.
1399
  variable_names_validated : list[str] of length n_features
1400
  Validated list of variable names for each feature in `X`.
1401
+ X_units : list[str] of length n_features
1402
+ Validated units for `X`.
1403
+ y_units : str | list[str] of length n_out
1404
+ Validated units for `y`.
1405
 
1406
  """
1407
  if isinstance(X, pd.DataFrame):
 
1412
  "Using DataFrame column names instead."
1413
  )
1414
 
1415
+ if (
1416
+ pd.api.types.is_object_dtype(X.columns)
1417
+ and X.columns.str.contains(" ").any()
1418
+ ):
1419
  X.columns = X.columns.str.replace(" ", "_")
1420
  warnings.warn(
1421
  "Spaces in DataFrame column names are not supported. "
 
1438
  weights = check_array(weights, ensure_2d=False)
1439
  check_consistent_length(weights, y)
1440
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1441
+ self.feature_names_in_ = _check_feature_names_in(
1442
+ self, variable_names, generate_names=False
1443
+ )
1444
+
1445
+ if self.feature_names_in_ is None:
1446
+ self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])
1447
+ self.pretty_feature_names_in_ = np.array(
1448
+ [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
1449
+ )
1450
+ else:
1451
+ self.pretty_feature_names_in_ = None
1452
+
1453
  variable_names = self.feature_names_in_
1454
 
1455
  # Handle multioutput data
 
1460
  else:
1461
  raise NotImplementedError("y shape not supported!")
1462
 
1463
+ self.X_units_ = copy.deepcopy(X_units)
1464
+ self.y_units_ = copy.deepcopy(y_units)
1465
+
1466
+ return X, y, Xresampled, weights, variable_names, X_units, y_units
1467
 
1468
  def _pre_transform_training_data(
1469
+ self, X, y, Xresampled, variable_names, X_units, y_units, random_state
1470
  ):
1471
  """
1472
  Transform the training data before fitting the symbolic regressor.
 
1486
  variable_names : list[str]
1487
  Names of each variable in the training dataset, `X`.
1488
  Of length `n_features`.
1489
+ X_units : list[str]
1490
+ Units of each variable in the training dataset, `X`.
1491
+ y_units : str | list[str]
1492
+ Units of each variable in the training dataset, `y`.
1493
  random_state : int | np.RandomState
1494
  Pass an int for reproducible results across multiple function calls.
1495
  See :term:`Glossary <random_state>`. Default is `None`.
 
1511
  variable_names_transformed : list[str] of length n_features
1512
  Names of each variable in the transformed dataset,
1513
  `X_transformed`.
1514
+ X_units_transformed : list[str] of length n_features
1515
+ Units of each variable in the transformed dataset.
1516
+ y_units_transformed : str | list[str] of length n_out
1517
+ Units of each variable in the transformed dataset.
1518
  """
1519
  # Feature selection transformation
1520
  if self.select_k_features:
 
1529
  # Reduce variable_names to selection
1530
  variable_names = [variable_names[i] for i in self.selection_mask_]
1531
 
1532
+ if X_units is not None:
1533
+ X_units = [X_units[i] for i in self.selection_mask_]
1534
+ self.X_units_ = copy.deepcopy(X_units)
1535
+
1536
  # Re-perform data validation and feature name updating
1537
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1538
  # Update feature names with selected variable names
1539
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1540
+ self.pretty_feature_names_in_ = None
1541
  print(f"Using features {self.feature_names_in_}")
1542
 
1543
  # Denoising transformation
 
1557
  else:
1558
  X, y = _denoise(X, y, Xresampled=Xresampled, random_state=random_state)
1559
 
1560
+ return X, y, variable_names, X_units, y_units
1561
 
1562
  def _run(self, X, y, mutated_params, weights, seed):
1563
  """
 
1710
  tournament_selection_n=self.tournament_selection_n,
1711
  # These have the same name:
1712
  parsimony=self.parsimony,
1713
+ dimensional_constraint_penalty=self.dimensional_constraint_penalty,
1714
  alpha=self.alpha,
1715
  maxdepth=maxdepth,
1716
  fast_cycle=self.fast_cycle,
 
1730
  fraction_replaced=self.fraction_replaced,
1731
  topn=self.topn,
1732
  verbosity=self.verbosity,
1733
+ print_precision=self.print_precision,
1734
  optimizer_algorithm=self.optimizer_algorithm,
1735
  optimizer_nrestarts=self.optimizer_nrestarts,
1736
  optimizer_probability=self.optimize_probability,
 
1782
  None if parallelism in ["serial", "multithreading"] else int(self.procs)
1783
  )
1784
 
1785
+ y_variable_names = None
1786
+ if len(y.shape) > 1:
1787
+ # We set these manually so that they respect Python's 0 indexing
1788
+ # (by default Julia will use y1, y2...)
1789
+ y_variable_names = [f"y{_subscriptify(i)}" for i in range(y.shape[1])]
1790
+
1791
  # Call to Julia backend.
1792
  # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl
1793
  self.raw_julia_state_ = SymbolicRegression.equation_search(
 
1795
  Main.y,
1796
  weights=Main.weights,
1797
  niterations=int(self.niterations),
1798
+ variable_names=(
1799
+ self.pretty_feature_names_in_.tolist()
1800
+ if hasattr(self, "pretty_feature_names_in_")
1801
+ and self.pretty_feature_names_in_ is not None
1802
+ else self.feature_names_in_.tolist()
1803
+ ),
1804
+ y_variable_names=y_variable_names,
1805
+ X_units=self.X_units_,
1806
+ y_units=self.y_units_,
1807
  options=options,
1808
  numprocs=cprocs,
1809
  parallelism=parallelism,
 
1829
  Xresampled=None,
1830
  weights=None,
1831
  variable_names=None,
1832
+ X_units=None,
1833
+ y_units=None,
1834
  ):
1835
  """
1836
  Search for equations to fit the dataset and store them in `self.equations_`.
 
1858
  instead of `variable_names`. Cannot contain spaces or special
1859
  characters. Avoid variable names which are also
1860
  function names in `sympy`, such as "N".
1861
+ X_units : list[str]
1862
+ A list of units for each variable in `X`. Each unit should be
1863
+ a string representing a Julia expression. See DynamicQuantities.jl
1864
+ https://symbolicml.org/DynamicQuantities.jl/dev/units/ for more
1865
+ information.
1866
+ y_units : str | list[str]
1867
+ Similar to `X_units`, but as a unit for the target variable, `y`.
1868
+ If `y` is a matrix, a list of units should be passed. If `X_units`
1869
+ is given but `y_units` is not, then `y_units` will be arbitrary.
1870
 
1871
  Returns
1872
  -------
 
1888
  self.nout_ = 1
1889
  self.selection_mask_ = None
1890
  self.raw_julia_state_ = None
1891
+ self.X_units_ = None
1892
+ self.y_units_ = None
1893
 
1894
  random_state = check_random_state(self.random_state) # For np random
1895
  seed = random_state.get_state()[1][0] # For julia random
 
1898
 
1899
  mutated_params = self._validate_and_set_init_params()
1900
 
1901
+ (
1902
+ X,
1903
+ y,
1904
+ Xresampled,
1905
+ weights,
1906
+ variable_names,
1907
+ X_units,
1908
+ y_units,
1909
+ ) = self._validate_and_set_fit_params(
1910
+ X, y, Xresampled, weights, variable_names, X_units, y_units
1911
  )
1912
 
1913
  if X.shape[0] > 10000 and not self.batching:
 
1922
  )
1923
 
1924
  # Pre transformations (feature selection and denoising)
1925
+ X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
1926
+ X, y, Xresampled, variable_names, X_units, y_units, random_state
1927
  )
1928
 
1929
  # Warn about large feature counts (still warn if feature count is large
 
1952
  variable_names,
1953
  weights,
1954
  y,
1955
+ X_units,
1956
+ y_units,
1957
  )
1958
 
1959
  # Initially, just save model parameters, so that
 
2192
  with open(cur_filename, "r") as f:
2193
  buf = f.read()
2194
  buf = _preprocess_julia_floats(buf)
2195
+
2196
+ df = self._postprocess_dataframe(pd.read_csv(StringIO(buf)))
 
 
 
 
 
 
 
 
 
2197
 
2198
  all_outputs.append(df)
2199
  else:
 
2203
  with open(filename, "r") as f:
2204
  buf = f.read()
2205
  buf = _preprocess_julia_floats(buf)
2206
+ all_outputs = [self._postprocess_dataframe(pd.read_csv(StringIO(buf)))]
 
 
 
 
 
 
 
 
2207
 
2208
  except FileNotFoundError:
2209
  raise RuntimeError(
 
2212
  )
2213
  return all_outputs
2214
 
2215
+ def _postprocess_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
2216
+ df = df.rename(
2217
+ columns={
2218
+ "Complexity": "complexity",
2219
+ "Loss": "loss",
2220
+ "Equation": "equation",
2221
+ },
2222
+ )
2223
+ # Regexp replace x₁₂₃ to x123 in `equation`:
2224
+ if (
2225
+ hasattr(self, "pretty_feature_names_in_")
2226
+ and self.pretty_feature_names_in_ is not None
2227
+ ):
2228
+ # df["equation"] = df["equation"].apply(_undo_subscriptify_full)
2229
+ for pname, name in zip(
2230
+ self.pretty_feature_names_in_, self.feature_names_in_
2231
+ ):
2232
+ df["equation"] = df["equation"].apply(
2233
+ lambda s: re.sub(
2234
+ r"\b" + f"({pname})" + r"\b",
2235
+ name,
2236
+ s,
2237
+ )
2238
+ if isinstance(s, str)
2239
+ else s
2240
+ )
2241
+
2242
+ return df
2243
+
2244
  def get_hof(self):
2245
  """Get the equations from a hall of fame file.
2246
 
 
2541
  s = _apply_regexp_im_sci(s)
2542
  s = _apply_regexp_sci(s)
2543
  return s
2544
+
2545
+
2546
+ def _subscriptify(i: int) -> str:
2547
+ """Converts integer to subscript text form.
2548
+
2549
+ For example, 123 -> "₁₂₃".
2550
+ """
2551
+ return "".join([chr(0x2080 + int(c)) for c in str(i)])
pysr/test/test.py CHANGED
@@ -19,6 +19,7 @@ from ..sr import (
19
  _handle_feature_selection,
20
  _csv_filename_to_pkl_filename,
21
  idx_model_selection,
 
22
  )
23
  from ..export_latex import to_latex
24
 
@@ -711,6 +712,26 @@ class TestMiscellaneous(unittest.TestCase):
711
  # If any checks failed don't let the test pass.
712
  self.assertEqual(len(exception_messages), 0)
713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
 
715
  TRUE_PREAMBLE = "\n".join(
716
  [
@@ -906,6 +927,151 @@ class TestLaTeXTable(unittest.TestCase):
906
  self.assertEqual(latex_table_str, true_latex_table_str)
907
 
908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909
  def runtests():
910
  """Run all tests in test.py."""
911
  suite = unittest.TestSuite()
@@ -916,6 +1082,7 @@ def runtests():
916
  TestFeatureSelection,
917
  TestMiscellaneous,
918
  TestLaTeXTable,
 
919
  ]
920
  for test_case in test_cases:
921
  tests = loader.loadTestsFromTestCase(test_case)
 
19
  _handle_feature_selection,
20
  _csv_filename_to_pkl_filename,
21
  idx_model_selection,
22
+ _check_assertions,
23
  )
24
  from ..export_latex import to_latex
25
 
 
712
  # If any checks failed don't let the test pass.
713
  self.assertEqual(len(exception_messages), 0)
714
 
715
+ def test_param_groupings(self):
716
+ """Test that param_groupings are complete"""
717
+ param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
718
+ # Read the file, discarding lines ending in ":",
719
+ # and removing leading "\s*-\s*":
720
+ params = []
721
+ with open(param_groupings_file, "r") as f:
722
+ for line in f.readlines():
723
+ if line.strip().endswith(":"):
724
+ continue
725
+ if line.strip().startswith("-"):
726
+ params.append(line.strip()[1:].strip())
727
+
728
+ regressor_params = [
729
+ p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
730
+ ]
731
+
732
+ # Check the sets are equal:
733
+ self.assertSetEqual(set(params), set(regressor_params))
734
+
735
 
736
  TRUE_PREAMBLE = "\n".join(
737
  [
 
927
  self.assertEqual(latex_table_str, true_latex_table_str)
928
 
929
 
930
+ class TestDimensionalConstraints(unittest.TestCase):
931
+ def setUp(self):
932
+ self.default_test_kwargs = dict(
933
+ progress=False,
934
+ model_selection="accuracy",
935
+ niterations=DEFAULT_NITERATIONS * 2,
936
+ populations=DEFAULT_POPULATIONS * 2,
937
+ temp_equation_file=True,
938
+ )
939
+ self.rstate = np.random.RandomState(0)
940
+ self.X = self.rstate.randn(100, 5)
941
+
942
+ def test_dimensional_constraints(self):
943
+ y = np.cos(self.X[:, [0, 1]])
944
+ model = PySRRegressor(
945
+ binary_operators=[
946
+ "my_add(x, y) = x + y",
947
+ "my_sub(x, y) = x - y",
948
+ "my_mul(x, y) = x * y",
949
+ ],
950
+ unary_operators=["my_cos(x) = cos(x)"],
951
+ **self.default_test_kwargs,
952
+ early_stop_condition=1e-8,
953
+ select_k_features=3,
954
+ extra_sympy_mappings={
955
+ "my_cos": sympy.cos,
956
+ "my_add": lambda x, y: x + y,
957
+ "my_sub": lambda x, y: x - y,
958
+ "my_mul": lambda x, y: x * y,
959
+ },
960
+ )
961
+ model.fit(self.X, y, X_units=["m", "m", "m", "m", "m"], y_units=["m", "m"])
962
+
963
+ # The best expression should have complexity larger than just 2:
964
+ for i in range(2):
965
+ self.assertGreater(model.get_best()[i]["complexity"], 2)
966
+ self.assertLess(model.get_best()[i]["loss"], 1e-6)
967
+ self.assertGreater(
968
+ model.equations_[i].query("complexity <= 2").loss.min(), 1e-6
969
+ )
970
+
971
+ def test_unit_checks(self):
972
+ """This just checks the number of units passed"""
973
+ use_custom_variable_names = False
974
+ variable_names = None
975
+ weights = None
976
+ args = (use_custom_variable_names, variable_names, weights)
977
+ valid_units = [
978
+ (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
979
+ (np.ones((10, 1)), np.ones(10), ["m/s"], None),
980
+ (np.ones((10, 1)), np.ones(10), None, "m/s"),
981
+ (np.ones((10, 1)), np.ones(10), None, ["m/s"]),
982
+ (np.ones((10, 1)), np.ones((10, 1)), None, ["m/s"]),
983
+ (np.ones((10, 1)), np.ones((10, 2)), None, ["m/s", ""]),
984
+ ]
985
+ for X, y, X_units, y_units in valid_units:
986
+ _check_assertions(
987
+ X,
988
+ *args,
989
+ y,
990
+ X_units,
991
+ y_units,
992
+ )
993
+ invalid_units = [
994
+ (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], None),
995
+ (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], "m"),
996
+ (np.ones((10, 2)), np.ones((10, 2)), ["m/s", "s"], ["m"]),
997
+ (np.ones((10, 1)), np.ones((10, 1)), "m/s", ["m"]),
998
+ ]
999
+ for X, y, X_units, y_units in invalid_units:
1000
+ with self.assertRaises(ValueError):
1001
+ _check_assertions(
1002
+ X,
1003
+ *args,
1004
+ y,
1005
+ X_units,
1006
+ y_units,
1007
+ )
1008
+
1009
+ def test_unit_propagation(self):
1010
+ """Check that units are propagated correctly.
1011
+
1012
+ This also tests that variables have the correct names.
1013
+ """
1014
+ X = np.ones((100, 3))
1015
+ y = np.ones((100, 1))
1016
+ temp_dir = Path(tempfile.mkdtemp())
1017
+ equation_file = str(temp_dir / "equation_file.csv")
1018
+ model = PySRRegressor(
1019
+ binary_operators=["+", "*"],
1020
+ early_stop_condition="(l, c) -> l < 1e-6 && c == 3",
1021
+ progress=False,
1022
+ model_selection="accuracy",
1023
+ niterations=DEFAULT_NITERATIONS * 2,
1024
+ populations=DEFAULT_POPULATIONS * 2,
1025
+ complexity_of_constants=10,
1026
+ weight_mutate_constant=0.0,
1027
+ should_optimize_constants=False,
1028
+ multithreading=False,
1029
+ deterministic=True,
1030
+ procs=0,
1031
+ random_state=0,
1032
+ equation_file=equation_file,
1033
+ warm_start=True,
1034
+ )
1035
+ model.fit(
1036
+ X,
1037
+ y,
1038
+ X_units=["m", "s", "A"],
1039
+ y_units=["m*A"],
1040
+ )
1041
+ best = model.get_best()
1042
+ self.assertIn("x0", best["equation"])
1043
+ self.assertNotIn("x1", best["equation"])
1044
+ self.assertIn("x2", best["equation"])
1045
+ self.assertEqual(best["complexity"], 3)
1046
+ self.assertEqual(model.equations_.iloc[0].complexity, 1)
1047
+ self.assertGreater(model.equations_.iloc[0].loss, 1e-6)
1048
+
1049
+ # With pkl file:
1050
+ pkl_file = str(temp_dir / "equation_file.pkl")
1051
+ model2 = PySRRegressor.from_file(pkl_file)
1052
+ best2 = model2.get_best()
1053
+ self.assertIn("x0", best2["equation"])
1054
+
1055
+ # From csv file alone (we need to delete pkl file:)
1056
+ # First, we delete the pkl file:
1057
+ os.remove(pkl_file)
1058
+ model3 = PySRRegressor.from_file(
1059
+ equation_file, binary_operators=["+", "*"], n_features_in=X.shape[1]
1060
+ )
1061
+ best3 = model3.get_best()
1062
+ self.assertIn("x0", best3["equation"])
1063
+
1064
+ # Try warm start, but with no units provided (should
1065
+ # be a different dataset, and thus different result):
1066
+ model.fit(X, y)
1067
+ model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1"
1068
+ self.assertEqual(model.equations_.iloc[0].complexity, 1)
1069
+ self.assertLess(model.equations_.iloc[0].loss, 1e-6)
1070
+
1071
+
1072
+ # TODO: Determine desired behavior if second .fit() call does not have units
1073
+
1074
+
1075
  def runtests():
1076
  """Run all tests in test.py."""
1077
  suite = unittest.TestSuite()
 
1082
  TestFeatureSelection,
1083
  TestMiscellaneous,
1084
  TestLaTeXTable,
1085
+ TestDimensionalConstraints,
1086
  ]
1087
  for test_case in test_cases:
1088
  tests = loader.loadTestsFromTestCase(test_case)
pysr/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.14.4"
2
- __symbolic_regression_jl_version__ = "0.20.0"
 
1
+ __version__ = "0.15.0"
2
+ __symbolic_regression_jl_version__ = "0.21.3"
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  sympy
2
- pandas
3
  numpy
4
  scikit_learn>=1.0.0
5
  julia>=0.6.0
 
1
  sympy
2
+ pandas>=0.21.0
3
  numpy
4
  scikit_learn>=1.0.0
5
  julia>=0.6.0