Spaces:
Running
Running
File size: 5,822 Bytes
ac2928c 7e72d81 271a473 f2e0980 d974a2c 925fb38 d974a2c ac2928c 7d4300a ac2928c 271a473 ac2928c 7d4300a 271a473 ac2928c 271a473 c88fbe0 ac2928c 7d4300a ac2928c 7d4300a ac2928c 271a473 7d4300a ac2928c 6fc6d39 ac2928c b6ed59b 271a473 e2a7e95 7d4300a e2a7e95 7d4300a b6ed59b 7d4300a b6ed59b 271a473 7d4300a b6ed59b 7d4300a 6fc6d39 b6ed59b 925fb38 b6ed59b 7d4300a b6ed59b 7d4300a b6ed59b 7d4300a 59765a8 b6ed59b 6fc6d39 59765a8 7d4300a 59765a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import numpy as np
import csv
import traceback
from .sr import pysr, best
from pathlib import Path
from functools import partial
PKG_DIR = Path(__file__).parents[1]
FEYNMAN_DATASET = PKG_DIR / "datasets" / "FeynmanEquations.csv"
class Problem:
"""
Problem API to work with PySR.
Has attributes: X, y as pysr accepts, form which is a string representing the correct equation and variable_names
Should be able to call pysr(problem.X, problem.y, var_names=problem.var_names) and have it work
"""
def __init__(self, X, y, form=None, variable_names=None):
self.X = X
self.y = y
self.form = form
self.variable_names = variable_names
class FeynmanProblem(Problem):
"""
Stores the data for the problems from the 100 Feynman Equations on Physics.
This is the benchmark used in the AI Feynman Paper
"""
def __init__(self, row, gen=False, dp=500):
"""
row: a row read as a dict from the FeynmanEquations dataset provided in the datasets folder of the repo
gen: If true the problem will have dp X and y values randomly generated else they will be None
"""
self.eq_id = row["Filename"]
self.n_vars = int(row["# variables"])
super(FeynmanProblem, self).__init__(
None,
None,
form=row["Formula"],
variable_names=[row[f"v{i + 1}_name"] for i in range(self.n_vars)],
)
self.low = [float(row[f"v{i+1}_low"]) for i in range(self.n_vars)]
self.high = [float(row[f"v{i+1}_high"]) for i in range(self.n_vars)]
self.dp = dp
if gen:
self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
d = {}
for var in range(len(self.variable_names)):
d[self.variable_names[var]] = self.X[:, var]
d["exp"] = np.exp
d["sqrt"] = np.sqrt
d["pi"] = np.pi
d["cos"] = np.cos
d["sin"] = np.sin
d["tan"] = np.tan
d["tanh"] = np.tanh
d["ln"] = np.log
d["log"] = np.log # Quite sure the Feynman dataset has no base 10 logs
d["arcsin"] = np.arcsin
self.y = eval(self.form, d)
def __str__(self):
return f"Feynman Equation: {self.eq_id}|Form: {self.form}"
def __repr__(self):
return str(self)
def mk_problems(first=100, gen=False, dp=500, data_dir=FEYNMAN_DATASET):
"""
first: the first "first" equations from the dataset will be made into problems
data_dir: the path pointing to the Feynman Equations csv
returns: list of FeynmanProblems
"""
ret = []
with open(data_dir) as csvfile:
ind = 0
reader = csv.DictReader(csvfile)
for i, row in enumerate(reader):
if ind > first:
break
if row["Filename"] == "":
continue
try:
p = FeynmanProblem(row, gen=gen, dp=dp)
ret.append(p)
except Exception as e:
traceback.print_exc()
print(f"FAILED ON ROW {i} with {e}")
ind += 1
return ret
def run_on_problem(problem, verbosity=0, multiprocessing=True):
"""
Takes in a problem and returns a tuple: (equations, best predicted equation, actual equation)
"""
from time import time
starting = time()
equations = pysr(
problem.X,
problem.y,
variable_names=problem.variable_names,
verbosity=verbosity,
)
timing = time() - starting
others = {"time": timing, "problem": problem}
if not multiprocessing:
others["equations"] = equations
return str(best(equations)), problem.form, others
def do_feynman_experiments_parallel(
first=100,
verbosity=0,
dp=500,
output_file_path="FeynmanExperiment.csv",
data_dir=FEYNMAN_DATASET,
):
import multiprocessing as mp
from tqdm import tqdm
problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir)
ids = []
predictions = []
true_equations = []
time_takens = []
pool = mp.Pool()
results = []
with tqdm(total=len(problems)) as pbar:
f = partial(run_on_problem, verbosity=verbosity)
for i, res in enumerate(pool.imap(f, problems)):
results.append(res)
pbar.update()
for res in results:
prediction, true_equation, others = res
problem = others["problem"]
ids.append(problem.eq_id)
predictions.append(prediction)
true_equations.append(true_equation)
time_takens.append(others["time"])
with open(output_file_path, "a") as f:
writer = csv.writer(f, delimiter=",")
writer.writerow(["ID", "Predicted", "True", "Time"])
for i in range(len(ids)):
writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]])
def do_feynman_experiments(
first=100,
verbosity=0,
dp=500,
output_file_path="FeynmanExperiment.csv",
data_dir=FEYNMAN_DATASET,
):
from tqdm import tqdm
problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir)
ids = []
predictions = []
true_equations = []
time_takens = []
for problem in tqdm(problems):
prediction, true_equation, others = run_on_problem(problem, verbosity)
ids.append(problem.eq_id)
predictions.append(prediction)
true_equations.append(true_equation)
time_takens.append(others["time"])
with open(output_file_path, "a") as f:
writer = csv.writer(f, delimiter=",")
writer.writerow(["ID", "Predicted", "True", "Time"])
for i in range(len(ids)):
writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]])
|