MilesCranmer commited on
Commit
73042d9
·
unverified ·
1 Parent(s): 88a78a4

Add test data generator to app

Browse files
Files changed (1) hide show
  1. gui/app.py +69 -45
gui/app.py CHANGED
@@ -14,60 +14,76 @@ empty_df = pd.DataFrame(
14
  }
15
  )
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def greet(
19
  file_obj: Optional[tempfile._TemporaryFileWrapper],
20
- col_to_fit: str,
 
 
21
  niterations: int,
22
  maxsize: int,
23
  binary_operators: list,
24
  unary_operators: list,
25
  force_run: bool,
26
  ):
27
- if col_to_fit == "":
28
- return (
29
- empty_df,
30
- "Please enter a column to predict!",
31
- )
32
- if len(binary_operators) == 0 and len(unary_operators) == 0:
33
- return (
34
- empty_df,
35
- "Please select at least one operator!",
36
- )
37
- if file_obj is None:
38
- return (
39
- empty_df,
40
- "Please upload a CSV file!",
41
- )
42
- # Look at some statistics of the file:
43
- df = pd.read_csv(file_obj)
44
- if len(df) == 0:
45
- return (
46
- empty_df,
47
- "The file is empty!",
48
- )
49
- if len(df.columns) == 1:
50
- return (
51
- empty_df,
52
- "The file has only one column!",
53
- )
54
- if col_to_fit not in df.columns:
55
- return (
56
- empty_df,
57
- f"The column to predict, {col_to_fit}, is not in the file!"
58
- f"I found {df.columns}.",
59
- )
60
- if len(df) > 10_000 and not force_run:
61
- return (
62
- empty_df,
63
- "You have uploaded a file with more than 10,000 rows. "
64
- "This will take very long to run. "
65
- "Please upload a subsample of the data, "
66
- "or check the box 'Ignore Warnings'.",
67
- )
68
 
69
- y = np.array(df[col_to_fit])
70
- X = df.drop([col_to_fit], axis=1)
 
 
 
71
 
72
  model = pysr.PySRRegressor(
73
  bumper=True,
@@ -106,7 +122,15 @@ def main():
106
  description="Symbolic Regression with PySR. Watch search progress by following the logs.",
107
  inputs=[
108
  gr.File(label="Upload a CSV File"),
109
- gr.Textbox(label="Column to Predict", placeholder="y"),
 
 
 
 
 
 
 
 
110
  gr.Slider(
111
  minimum=1,
112
  maximum=1000,
 
14
  }
15
  )
16
 
17
+ test_equations = {
18
+ "Complex Polynomial": "3*x^3 + 2*x^2 - x + sin(x)",
19
+ "Exponential and Logarithmic": "exp(-x) + log(x+1)",
20
+ "Trigonometric Polynomial": "sin(x) + cos(2*x) + tan(x/3)",
21
+ "Mixed Functions": "sqrt(x)*exp(-x) + cos(pi*x)",
22
+ "Rational Function": "(x^2 + 1) / (x - 2)",
23
+ }
24
+
25
+
26
+ def generate_data(equation: str, num_points: int, noise_level: float):
27
+ x = np.linspace(-10, 10, num_points)
28
+ s = test_equations[equation]
29
+ for (k, v) in {
30
+ "sin": "np.sin",
31
+ "cos": "np.cos",
32
+ "exp": "np.exp",
33
+ "log": "np.log",
34
+ "tan": "np.tan",
35
+ "^": "**",
36
+ }.items():
37
+ s = s.replace(k, v)
38
+ y = eval(s)
39
+ noise = np.random.normal(0, noise_level, y.shape)
40
+ y_noisy = y + noise
41
+ return pd.DataFrame({"x": x}), y_noisy
42
+
43
 
44
  def greet(
45
  file_obj: Optional[tempfile._TemporaryFileWrapper],
46
+ test_equation: str,
47
+ num_points: int,
48
+ noise_level: float,
49
  niterations: int,
50
  maxsize: int,
51
  binary_operators: list,
52
  unary_operators: list,
53
  force_run: bool,
54
  ):
55
+ if file_obj is not None:
56
+ if len(binary_operators) == 0 and len(unary_operators) == 0:
57
+ return (
58
+ empty_df,
59
+ "Please select at least one operator!",
60
+ )
61
+ # Look at some statistics of the file:
62
+ df = pd.read_csv(file_obj)
63
+ if len(df) == 0:
64
+ return (
65
+ empty_df,
66
+ "The file is empty!",
67
+ )
68
+ if len(df.columns) == 1:
69
+ return (
70
+ empty_df,
71
+ "The file has only one column!",
72
+ )
73
+ if len(df) > 10_000 and not force_run:
74
+ return (
75
+ empty_df,
76
+ "You have uploaded a file with more than 10,000 rows. "
77
+ "This will take very long to run. "
78
+ "Please upload a subsample of the data, "
79
+ "or check the box 'Ignore Warnings'.",
80
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ col_to_fit = df.columns[-1]
83
+ y = np.array(df[col_to_fit])
84
+ X = df.drop([col_to_fit], axis=1)
85
+ else:
86
+ X, y = generate_data(test_equation, num_points, noise_level)
87
 
88
  model = pysr.PySRRegressor(
89
  bumper=True,
 
122
  description="Symbolic Regression with PySR. Watch search progress by following the logs.",
123
  inputs=[
124
  gr.File(label="Upload a CSV File"),
125
+ gr.Radio(list(test_equations.keys()), label="Test Equation"),
126
+ gr.Slider(
127
+ minimum=10,
128
+ maximum=1000,
129
+ value=100,
130
+ label="Number of Data Points",
131
+ step=1,
132
+ ),
133
+ gr.Slider(minimum=0, maximum=1, value=0.1, label="Noise Level"),
134
  gr.Slider(
135
  minimum=1,
136
  maximum=1000,