Spaces:
Running
Running
tttc3
commited on
Commit
·
4819728
1
Parent(s):
9750ff9
Added validation for weights
Browse files- pysr/sr.py +17 -7
pysr/sr.py
CHANGED
@@ -13,7 +13,11 @@ from datetime import datetime
|
|
13 |
import warnings
|
14 |
from multiprocessing import cpu_count
|
15 |
from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
|
16 |
-
from sklearn.utils.validation import
|
|
|
|
|
|
|
|
|
17 |
|
18 |
from .julia_helpers import (
|
19 |
init_julia,
|
@@ -980,13 +984,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
980 |
parameter_value, str
|
981 |
):
|
982 |
parameter_value = [parameter_value]
|
983 |
-
elif parameter
|
984 |
warnings.warn(
|
985 |
"Given :param`batch_size` must be greater than or equal to one. "
|
986 |
":param`batch_size` has been increased to equal one."
|
987 |
)
|
988 |
parameter_value = 1
|
989 |
-
elif parameter
|
990 |
warnings.warn(
|
991 |
"Note: it looks like you are running in Jupyter. The progress bar will be turned off."
|
992 |
)
|
@@ -1000,7 +1004,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1000 |
)
|
1001 |
return packed_modified_params
|
1002 |
|
1003 |
-
def _validate_fit_params(self, X, y, Xresampled, variable_names):
|
1004 |
"""
|
1005 |
Validates the parameters passed to the :term`fit` method.
|
1006 |
|
@@ -1018,6 +1022,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1018 |
(n_resampled, n_features), default=None
|
1019 |
Resampled training data used for denoising.
|
1020 |
|
|
|
|
|
|
|
|
|
1021 |
variable_names : list[str] of length n_features
|
1022 |
Names of each variable in the training dataset, `X`.
|
1023 |
|
@@ -1064,6 +1072,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1064 |
# This method sets the n_features_in_ attribute
|
1065 |
if Xresampled is not None:
|
1066 |
Xresampled = check_array(Xresampled)
|
|
|
|
|
1067 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
1068 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1069 |
variable_names = self.feature_names_in_
|
@@ -1076,7 +1086,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1076 |
else:
|
1077 |
raise NotImplementedError("y shape not supported!")
|
1078 |
|
1079 |
-
return X, y, Xresampled, variable_names
|
1080 |
|
1081 |
def _pre_transform_training_data(
|
1082 |
self, X, y, Xresampled, variable_names, random_state
|
@@ -1452,8 +1462,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1452 |
mutated_params = self._validate_init_params()
|
1453 |
|
1454 |
# Parameter input validation (for parameters defined in __init__)
|
1455 |
-
X, y, Xresampled, variable_names = self._validate_fit_params(
|
1456 |
-
X, y, Xresampled, variable_names
|
1457 |
)
|
1458 |
|
1459 |
if X.shape[0] > 10000 and not self.batching:
|
|
|
13 |
import warnings
|
14 |
from multiprocessing import cpu_count
|
15 |
from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
|
16 |
+
from sklearn.utils.validation import (
|
17 |
+
_check_feature_names_in,
|
18 |
+
_check_sample_weight,
|
19 |
+
check_is_fitted,
|
20 |
+
)
|
21 |
|
22 |
from .julia_helpers import (
|
23 |
init_julia,
|
|
|
984 |
parameter_value, str
|
985 |
):
|
986 |
parameter_value = [parameter_value]
|
987 |
+
elif parameter == "batch_size" and parameter_value < 1:
|
988 |
warnings.warn(
|
989 |
"Given :param`batch_size` must be greater than or equal to one. "
|
990 |
":param`batch_size` has been increased to equal one."
|
991 |
)
|
992 |
parameter_value = 1
|
993 |
+
elif parameter == "progress" and not buffer_available:
|
994 |
warnings.warn(
|
995 |
"Note: it looks like you are running in Jupyter. The progress bar will be turned off."
|
996 |
)
|
|
|
1004 |
)
|
1005 |
return packed_modified_params
|
1006 |
|
1007 |
+
def _validate_fit_params(self, X, y, Xresampled, weights, variable_names):
|
1008 |
"""
|
1009 |
Validates the parameters passed to the :term`fit` method.
|
1010 |
|
|
|
1022 |
(n_resampled, n_features), default=None
|
1023 |
Resampled training data used for denoising.
|
1024 |
|
1025 |
+
weights : {ndarray | pandas.DataFrame} of the same shape as y
|
1026 |
+
Each element is how to weight the mean-square-error loss
|
1027 |
+
for that particular element of y.
|
1028 |
+
|
1029 |
variable_names : list[str] of length n_features
|
1030 |
Names of each variable in the training dataset, `X`.
|
1031 |
|
|
|
1072 |
# This method sets the n_features_in_ attribute
|
1073 |
if Xresampled is not None:
|
1074 |
Xresampled = check_array(Xresampled)
|
1075 |
+
if weights is not None:
|
1076 |
+
weights = _check_sample_weight(weights, y)
|
1077 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
1078 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1079 |
variable_names = self.feature_names_in_
|
|
|
1086 |
else:
|
1087 |
raise NotImplementedError("y shape not supported!")
|
1088 |
|
1089 |
+
return X, y, Xresampled, weights, variable_names
|
1090 |
|
1091 |
def _pre_transform_training_data(
|
1092 |
self, X, y, Xresampled, variable_names, random_state
|
|
|
1462 |
mutated_params = self._validate_init_params()
|
1463 |
|
1464 |
# Parameter input validation (for parameters defined in __init__)
|
1465 |
+
X, y, Xresampled, weights, variable_names = self._validate_fit_params(
|
1466 |
+
X, y, Xresampled, weights, variable_names
|
1467 |
)
|
1468 |
|
1469 |
if X.shape[0] > 10000 and not self.batching:
|