Spaces:
Running
Running
MilesCranmer
commited on
fix: selection_mask to be bool array
Browse files- pysr/feature_selection.py +2 -2
- pysr/sr.py +14 -6
pysr/feature_selection.py
CHANGED
@@ -14,7 +14,7 @@ def run_feature_selection(
|
|
14 |
y: ndarray,
|
15 |
select_k_features: int,
|
16 |
random_state: Optional[np.random.RandomState] = None,
|
17 |
-
) -> NDArray[np.
|
18 |
"""
|
19 |
Find most important features.
|
20 |
|
@@ -32,7 +32,7 @@ def run_feature_selection(
|
|
32 |
selector = SelectFromModel(
|
33 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
34 |
)
|
35 |
-
return cast(NDArray[np.
|
36 |
|
37 |
|
38 |
# Function has not been removed only due to usage in module tests
|
|
|
14 |
y: ndarray,
|
15 |
select_k_features: int,
|
16 |
random_state: Optional[np.random.RandomState] = None,
|
17 |
+
) -> NDArray[np.bool_]:
|
18 |
"""
|
19 |
Find most important features.
|
20 |
|
|
|
32 |
selector = SelectFromModel(
|
33 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
34 |
)
|
35 |
+
return cast(NDArray[np.bool_], selector.get_support(indices=False))
|
36 |
|
37 |
|
38 |
# Function has not been removed only due to usage in module tests
|
pysr/sr.py
CHANGED
@@ -674,7 +674,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
674 |
X_units_: Optional[ArrayLike[str]]
|
675 |
y_units_: Optional[Union[str, ArrayLike[str]]]
|
676 |
nout_: int
|
677 |
-
selection_mask_: Optional[NDArray[np.
|
678 |
tempdir_: Path
|
679 |
equation_file_: Union[str, Path]
|
680 |
julia_state_stream_: Optional[NDArray[np.uint8]]
|
@@ -920,7 +920,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
920 |
unary_operators: Optional[List[str]] = None,
|
921 |
n_features_in: Optional[int] = None,
|
922 |
feature_names_in: Optional[ArrayLike[str]] = None,
|
923 |
-
selection_mask: Optional[NDArray[np.
|
924 |
nout: int = 1,
|
925 |
**pysr_kwargs,
|
926 |
):
|
@@ -944,7 +944,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
944 |
feature_names_in : list[str]
|
945 |
Names of the features passed to the model.
|
946 |
Not needed if loading from a pickle file.
|
947 |
-
selection_mask : NDArray[np.
|
948 |
If using `select_k_features`, you must pass `model.selection_mask_` here.
|
949 |
Not needed if loading from a pickle file.
|
950 |
nout : int
|
@@ -1016,7 +1016,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1016 |
model.display_feature_names_in_ = feature_names_in
|
1017 |
|
1018 |
if selection_mask is None:
|
1019 |
-
model.selection_mask_ = np.
|
1020 |
else:
|
1021 |
model.selection_mask_ = selection_mask
|
1022 |
|
@@ -1534,11 +1534,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1534 |
|
1535 |
# Reduce variable_names to selection
|
1536 |
variable_names = cast(
|
1537 |
-
ArrayLike[str],
|
|
|
|
|
|
|
|
|
|
|
1538 |
)
|
1539 |
|
1540 |
if X_units is not None:
|
1541 |
-
X_units = cast(
|
|
|
|
|
|
|
1542 |
self.X_units_ = copy.deepcopy(X_units)
|
1543 |
|
1544 |
# Re-perform data validation and feature name updating
|
|
|
674 |
X_units_: Optional[ArrayLike[str]]
|
675 |
y_units_: Optional[Union[str, ArrayLike[str]]]
|
676 |
nout_: int
|
677 |
+
selection_mask_: Optional[NDArray[np.bool_]]
|
678 |
tempdir_: Path
|
679 |
equation_file_: Union[str, Path]
|
680 |
julia_state_stream_: Optional[NDArray[np.uint8]]
|
|
|
920 |
unary_operators: Optional[List[str]] = None,
|
921 |
n_features_in: Optional[int] = None,
|
922 |
feature_names_in: Optional[ArrayLike[str]] = None,
|
923 |
+
selection_mask: Optional[NDArray[np.bool_]] = None,
|
924 |
nout: int = 1,
|
925 |
**pysr_kwargs,
|
926 |
):
|
|
|
944 |
feature_names_in : list[str]
|
945 |
Names of the features passed to the model.
|
946 |
Not needed if loading from a pickle file.
|
947 |
+
selection_mask : NDArray[np.bool_]
|
948 |
If using `select_k_features`, you must pass `model.selection_mask_` here.
|
949 |
Not needed if loading from a pickle file.
|
950 |
nout : int
|
|
|
1016 |
model.display_feature_names_in_ = feature_names_in
|
1017 |
|
1018 |
if selection_mask is None:
|
1019 |
+
model.selection_mask_ = np.ones(n_features_in, dtype=np.bool_)
|
1020 |
else:
|
1021 |
model.selection_mask_ = selection_mask
|
1022 |
|
|
|
1534 |
|
1535 |
# Reduce variable_names to selection
|
1536 |
variable_names = cast(
|
1537 |
+
ArrayLike[str],
|
1538 |
+
[
|
1539 |
+
variable_names[i]
|
1540 |
+
for i in range(len(variable_names))
|
1541 |
+
if selection_mask[i]
|
1542 |
+
],
|
1543 |
)
|
1544 |
|
1545 |
if X_units is not None:
|
1546 |
+
X_units = cast(
|
1547 |
+
ArrayLike[str],
|
1548 |
+
[X_units[i] for i in range(len(X_units)) if selection_mask[i]],
|
1549 |
+
)
|
1550 |
self.X_units_ = copy.deepcopy(X_units)
|
1551 |
|
1552 |
# Re-perform data validation and feature name updating
|