MilesCranmer commited on
Commit
9854909
·
unverified ·
1 Parent(s): cd925dd

fix: selection_mask to be bool array

Browse files
Files changed (2) hide show
  1. pysr/feature_selection.py +2 -2
  2. pysr/sr.py +14 -6
pysr/feature_selection.py CHANGED
@@ -14,7 +14,7 @@ def run_feature_selection(
14
  y: ndarray,
15
  select_k_features: int,
16
  random_state: Optional[np.random.RandomState] = None,
17
- ) -> NDArray[np.intp]:
18
  """
19
  Find most important features.
20
 
@@ -32,7 +32,7 @@ def run_feature_selection(
32
  selector = SelectFromModel(
33
  clf, threshold=-np.inf, max_features=select_k_features, prefit=True
34
  )
35
- return cast(NDArray[np.intp], selector.get_support(indices=True))
36
 
37
 
38
  # Function has not been removed only due to usage in module tests
 
14
  y: ndarray,
15
  select_k_features: int,
16
  random_state: Optional[np.random.RandomState] = None,
17
+ ) -> NDArray[np.bool_]:
18
  """
19
  Find most important features.
20
 
 
32
  selector = SelectFromModel(
33
  clf, threshold=-np.inf, max_features=select_k_features, prefit=True
34
  )
35
+ return cast(NDArray[np.bool_], selector.get_support(indices=False))
36
 
37
 
38
  # Function has not been removed only due to usage in module tests
pysr/sr.py CHANGED
@@ -674,7 +674,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
674
  X_units_: Optional[ArrayLike[str]]
675
  y_units_: Optional[Union[str, ArrayLike[str]]]
676
  nout_: int
677
- selection_mask_: Optional[NDArray[np.intp]]
678
  tempdir_: Path
679
  equation_file_: Union[str, Path]
680
  julia_state_stream_: Optional[NDArray[np.uint8]]
@@ -920,7 +920,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
920
  unary_operators: Optional[List[str]] = None,
921
  n_features_in: Optional[int] = None,
922
  feature_names_in: Optional[ArrayLike[str]] = None,
923
- selection_mask: Optional[NDArray[np.intp]] = None,
924
  nout: int = 1,
925
  **pysr_kwargs,
926
  ):
@@ -944,7 +944,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
944
  feature_names_in : list[str]
945
  Names of the features passed to the model.
946
  Not needed if loading from a pickle file.
947
- selection_mask : NDArray[np.intp]
948
  If using `select_k_features`, you must pass `model.selection_mask_` here.
949
  Not needed if loading from a pickle file.
950
  nout : int
@@ -1016,7 +1016,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1016
  model.display_feature_names_in_ = feature_names_in
1017
 
1018
  if selection_mask is None:
1019
- model.selection_mask_ = np.arange(n_features_in, dtype=np.intp)
1020
  else:
1021
  model.selection_mask_ = selection_mask
1022
 
@@ -1534,11 +1534,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1534
 
1535
  # Reduce variable_names to selection
1536
  variable_names = cast(
1537
- ArrayLike[str], [variable_names[i] for i in selection_mask]
 
 
 
 
 
1538
  )
1539
 
1540
  if X_units is not None:
1541
- X_units = cast(ArrayLike[str], [X_units[i] for i in selection_mask])
 
 
 
1542
  self.X_units_ = copy.deepcopy(X_units)
1543
 
1544
  # Re-perform data validation and feature name updating
 
674
  X_units_: Optional[ArrayLike[str]]
675
  y_units_: Optional[Union[str, ArrayLike[str]]]
676
  nout_: int
677
+ selection_mask_: Optional[NDArray[np.bool_]]
678
  tempdir_: Path
679
  equation_file_: Union[str, Path]
680
  julia_state_stream_: Optional[NDArray[np.uint8]]
 
920
  unary_operators: Optional[List[str]] = None,
921
  n_features_in: Optional[int] = None,
922
  feature_names_in: Optional[ArrayLike[str]] = None,
923
+ selection_mask: Optional[NDArray[np.bool_]] = None,
924
  nout: int = 1,
925
  **pysr_kwargs,
926
  ):
 
944
  feature_names_in : list[str]
945
  Names of the features passed to the model.
946
  Not needed if loading from a pickle file.
947
+ selection_mask : NDArray[np.bool_]
948
  If using `select_k_features`, you must pass `model.selection_mask_` here.
949
  Not needed if loading from a pickle file.
950
  nout : int
 
1016
  model.display_feature_names_in_ = feature_names_in
1017
 
1018
  if selection_mask is None:
1019
+ model.selection_mask_ = np.ones(n_features_in, dtype=np.bool_)
1020
  else:
1021
  model.selection_mask_ = selection_mask
1022
 
 
1534
 
1535
  # Reduce variable_names to selection
1536
  variable_names = cast(
1537
+ ArrayLike[str],
1538
+ [
1539
+ variable_names[i]
1540
+ for i in range(len(variable_names))
1541
+ if selection_mask[i]
1542
+ ],
1543
  )
1544
 
1545
  if X_units is not None:
1546
+ X_units = cast(
1547
+ ArrayLike[str],
1548
+ [X_units[i] for i in range(len(X_units)) if selection_mask[i]],
1549
+ )
1550
  self.X_units_ = copy.deepcopy(X_units)
1551
 
1552
  # Re-perform data validation and feature name updating