import numpy as np import pandas as pd TEST_EQUATIONS = ["sin(2*x)/x + 0.1*x"] def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): rstate = np.random.RandomState(data_seed) x = rstate.uniform(-10, 10, num_points) for k, v in { "sin": "np.sin", "cos": "np.cos", "exp": "np.exp", "log": "np.log", "tan": "np.tan", "^": "**", }.items(): s = s.replace(k, v) y = eval(s) noise = rstate.normal(0, noise_level, y.shape) y_noisy = y + noise return pd.DataFrame({"x": x}), y_noisy def read_csv(file_input: str, force_run: bool): # Look at some statistics of the file: df = pd.read_csv(file_input) if len(df) == 0: raise ValueError("The file is empty!") if len(df.columns) == 1: raise ValueError("The file has only one column!") if len(df) > 10_000 and not force_run: raise ValueError( "You have uploaded a file with more than 10,000 rows. " "This will take very long to run. " "Please upload a subsample of the data, " "or check the box 'Ignore Warnings'.", ) col_to_fit = df.columns[-1] y = np.array(df[col_to_fit]) X = df.drop([col_to_fit], axis=1) return X, y