MilesCranmer commited on
Commit
e5a9067
β€’
1 Parent(s): 117b2c3

Create pretty variable names for print outs

Browse files
Files changed (1) hide show
  1. pysr/sr.py +37 -31
pysr/sr.py CHANGED
@@ -633,8 +633,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
633
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
634
  Names of features seen during :term:`fit`. Defined only when `X`
635
  has feature names that are all strings.
636
- is_default_feature_names_ : bool
637
- Whether `feature_names_in_` was not set by the user.
638
  nout_ : int
639
  Number of output dimensions.
640
  selection_mask_ : list[int] of length `select_k_features`
@@ -997,12 +997,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
997
  model.n_features_in_ = n_features_in
998
 
999
  if feature_names_in is None:
1000
- model.feature_names_in_ = [f"x{_subscriptify(i)}" for i in range(n_features_in)]
1001
- model.is_default_feature_names_ = True
 
 
1002
  else:
1003
  assert len(feature_names_in) == n_features_in
1004
  model.feature_names_in_ = feature_names_in
1005
- model.is_default_feature_names_ = False
1006
 
1007
  if selection_mask is None:
1008
  model.selection_mask_ = np.ones(n_features_in, dtype=bool)
@@ -1388,17 +1390,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1388
  weights = check_array(weights, ensure_2d=False)
1389
  check_consistent_length(weights, y)
1390
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1391
- feature_names_in_ = _check_feature_names_in(self, variable_names, generate_names=False)
1392
-
1393
- if feature_names_in_ is None:
1394
- self.feature_names_in_ = [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
1395
- # We record that we have generated the feature names
1396
- # so that we can undo the subscriptification (for
1397
- # SymPy compatibility).
1398
- self.is_default_feature_names_ = True
 
1399
  else:
1400
- self.feature_names_in = feature_names_in_
1401
- self.is_default_feature_names_ = False
1402
 
1403
  variable_names = self.feature_names_in_
1404
 
@@ -1721,7 +1723,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1721
  Main.y,
1722
  weights=Main.weights,
1723
  niterations=int(self.niterations),
1724
- variable_names=self.feature_names_in_,
 
 
 
 
1725
  options=options,
1726
  numprocs=cprocs,
1727
  parallelism=parallelism,
@@ -2098,9 +2104,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2098
  with open(filename, "r") as f:
2099
  buf = f.read()
2100
  buf = _preprocess_julia_floats(buf)
2101
- all_outputs = [
2102
- self._postprocess_dataframe(pd.read_csv(StringIO(buf)))
2103
- ]
2104
 
2105
  except FileNotFoundError:
2106
  raise RuntimeError(
@@ -2118,14 +2122,23 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2118
  },
2119
  )
2120
  # Regexp replace x₁₂₃ to x123 in `equation`:
2121
- if self.is_default_feature_names_:
2122
- df["equation"] = df["equation"].apply(
2123
- lambda s: re.sub(r"x([β‚€β‚β‚‚β‚ƒβ‚„β‚…β‚†β‚‡β‚ˆβ‚‰]+)", lambda m: f"x{_undo_subscriptify(m.group(1))}", s)
2124
- )
 
 
 
 
 
 
 
 
 
 
2125
 
2126
  return df
2127
 
2128
-
2129
  def get_hof(self):
2130
  """Get the equations from a hall of fame file.
2131
 
@@ -2434,10 +2447,3 @@ def _subscriptify(i: int) -> str:
2434
  For example, 123 -> "₁₂₃".
2435
  """
2436
  return "".join([chr(0x2080 + int(c)) for c in str(i)])
2437
-
2438
- def _undo_subscriptify(s: str) -> int:
2439
- """Converts subscript text form to integer.
2440
-
2441
- For example, "₁₂₃" -> 123.
2442
- """
2443
- return int("".join([str(ord(c) - 0x2080) for c in s]))
 
633
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
634
  Names of features seen during :term:`fit`. Defined only when `X`
635
  has feature names that are all strings.
636
+ pretty_feature_names_in_ : ndarray of shape (`n_features_in_`,)
637
+ Pretty names of features, used only during printing.
638
  nout_ : int
639
  Number of output dimensions.
640
  selection_mask_ : list[int] of length `select_k_features`
 
997
  model.n_features_in_ = n_features_in
998
 
999
  if feature_names_in is None:
1000
+ model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
1001
+ model.pretty_feature_names_in_ = [
1002
+ f"x{_subscriptify(i)}" for i in range(n_features_in)
1003
+ ]
1004
  else:
1005
  assert len(feature_names_in) == n_features_in
1006
  model.feature_names_in_ = feature_names_in
1007
+ model.pretty_feature_names_in_ = None
1008
 
1009
  if selection_mask is None:
1010
  model.selection_mask_ = np.ones(n_features_in, dtype=bool)
 
1390
  weights = check_array(weights, ensure_2d=False)
1391
  check_consistent_length(weights, y)
1392
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1393
+ self.feature_names_in_ = _check_feature_names_in(
1394
+ self, variable_names, generate_names=False
1395
+ )
1396
+
1397
+ if self.feature_names_in_ is None:
1398
+ self.feature_names_in_ = [f"x{i}" for i in range(X.shape[1])]
1399
+ self.pretty_feature_names_in_ = [
1400
+ f"x{_subscriptify(i)}" for i in range(X.shape[1])
1401
+ ]
1402
  else:
1403
+ self.pretty_feature_names_in_ = None
 
1404
 
1405
  variable_names = self.feature_names_in_
1406
 
 
1723
  Main.y,
1724
  weights=Main.weights,
1725
  niterations=int(self.niterations),
1726
+ variable_names=(
1727
+ self.pretty_feature_names_in_
1728
+ if self.pretty_feature_names_in_ is not None
1729
+ else self.feature_names_in_
1730
+ ),
1731
  options=options,
1732
  numprocs=cprocs,
1733
  parallelism=parallelism,
 
2104
  with open(filename, "r") as f:
2105
  buf = f.read()
2106
  buf = _preprocess_julia_floats(buf)
2107
+ all_outputs = [self._postprocess_dataframe(pd.read_csv(StringIO(buf)))]
 
 
2108
 
2109
  except FileNotFoundError:
2110
  raise RuntimeError(
 
2122
  },
2123
  )
2124
  # Regexp replace x₁₂₃ to x123 in `equation`:
2125
+ if self.pretty_feature_names_in_ is not None:
2126
+ # df["equation"] = df["equation"].apply(_undo_subscriptify_full)
2127
+ for pname, name in zip(
2128
+ self.pretty_feature_names_in_, self.feature_names_in_
2129
+ ):
2130
+ df["equation"] = df["equation"].apply(
2131
+ lambda s: re.sub(
2132
+ r"\b" + f"({pname})" + r"\b",
2133
+ name,
2134
+ s,
2135
+ )
2136
+ if isinstance(s, str)
2137
+ else s
2138
+ )
2139
 
2140
  return df
2141
 
 
2142
  def get_hof(self):
2143
  """Get the equations from a hall of fame file.
2144
 
 
2447
  For example, 123 -> "₁₂₃".
2448
  """
2449
  return "".join([chr(0x2080 + int(c)) for c in str(i)])