Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

Tom Jelen

MilesCranmer commited on Mar 22

Commit

fb5f0a1

•

1 Parent(s): efffd9b

Fix TypeError when a variable name matches a builtin python function (#558)

Browse files

* fix thrown TypeError when a variable name matches a builtin python function

Example:

A dataset with a column named 'exec' failed with:

ValueError: Error from parse_expr with transformed code: "(Float ('86.76248' )-exec )"
... snip ...
TypeError: unsupported operand type(s) for -: 'Float' and 'builtin_function_or_method'

* Ensure backwards compatibility for `pysr2sympy` and use same method

* Fix potential issue with list ordering

* Combine builtin variable names test with noisy data test

* Fix builtin variable names test

---------

Co-authored-by: MilesCranmer <miles.cranmer@gmail.com>

Files changed (3) hide show

pysr/export_sympy.py +14 -2
pysr/sr.py +1 -0
pysr/test/test.py +5 -2

pysr/export_sympy.py CHANGED Viewed

@@ -57,6 +57,12 @@ sympy_mappings = {
 }
 def create_sympy_symbols(
     feature_names_in: List[str],
 ) -> List[sympy.Symbol]:
@@ -64,10 +70,16 @@ def create_sympy_symbols(
 def pysr2sympy(
-    equation: str, *, extra_sympy_mappings: Optional[Dict[str, Callable]] = None
 ):
     local_sympy_mappings = {
-        **(extra_sympy_mappings if extra_sympy_mappings else {}),
         **sympy_mappings,
     }

 }
+def create_sympy_symbols_map(
+    feature_names_in: List[str],
+) -> Dict[str, sympy.Symbol]:
+    return {variable: sympy.Symbol(variable) for variable in feature_names_in}
 def create_sympy_symbols(
     feature_names_in: List[str],
 ) -> List[sympy.Symbol]:
 def pysr2sympy(
+    equation: str,
+    *,
+    feature_names_in: Optional[List[str]] = None,
+    extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
 ):
+    if feature_names_in is None:
+        feature_names_in = []
     local_sympy_mappings = {
+        **create_sympy_symbols_map(feature_names_in),
+        **(extra_sympy_mappings if extra_sympy_mappings is not None else {}),
         **sympy_mappings,
     }

pysr/sr.py CHANGED Viewed

@@ -2226,6 +2226,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             for _, eqn_row in output.iterrows():
                 eqn = pysr2sympy(
                     eqn_row["equation"],
                     extra_sympy_mappings=self.extra_sympy_mappings,
                 )
                 sympy_format.append(eqn)

             for _, eqn_row in output.iterrows():
                 eqn = pysr2sympy(
                     eqn_row["equation"],
+                    feature_names_in=self.feature_names_in_,
                     extra_sympy_mappings=self.extra_sympy_mappings,
                 )
                 sympy_format.append(eqn)

pysr/test/test.py CHANGED Viewed

@@ -272,7 +272,7 @@ class TestPipeline(unittest.TestCase):
         regressor = PySRRegressor(warm_start=True, max_evals=10)
         regressor.fit(self.X, y)
-    def test_noisy(self):
         y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
         model = PySRRegressor(
             # Test that passing a single operator works:
@@ -289,9 +289,12 @@ class TestPipeline(unittest.TestCase):
         model.set_params(model_selection="best")
         # Also try without a temp equation file:
         model.set_params(temp_equation_file=False)
-        model.fit(self.X, y)
         self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
         self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
     def test_pandas_resample_with_nested_constraints(self):
         X = pd.DataFrame(

         regressor = PySRRegressor(warm_start=True, max_evals=10)
         regressor.fit(self.X, y)
+    def test_noisy_builtin_variable_names(self):
         y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
         model = PySRRegressor(
             # Test that passing a single operator works:
         model.set_params(model_selection="best")
         # Also try without a temp equation file:
         model.set_params(temp_equation_file=False)
+        # We also test builtin variable names
+        model.fit(self.X, y, variable_names=["exec", "hash", "x3", "x4", "x5"])
         self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
         self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
+        self.assertIn("exec", model.latex()[0])
+        self.assertIn("hash", model.latex()[1])
     def test_pandas_resample_with_nested_constraints(self):
         X = pd.DataFrame(