File size: 3,911 Bytes
ac2928c
7e72d81
271a473
ac2928c
 
 
 
 
 
271a473
 
ac2928c
 
271a473
ac2928c
 
271a473
c88fbe0
ac2928c
 
 
 
 
 
 
 
 
 
 
 
 
271a473
ac2928c
271a473
c88fbe0
ac2928c
 
 
 
 
 
 
 
 
271a473
 
ac2928c
 
 
 
 
 
 
 
 
 
c88fbe0
ac2928c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271a473
ac2928c
 
 
 
 
 
271a473
 
 
 
e2a7e95
271a473
e2a7e95
271a473
e2a7e95
 
 
271a473
ac2928c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
import csv
import traceback


class Problem:
    """
    Problem API to work with PySR.

    Has attributes: X, y as pysr accepts, form which is a string representing the correct equation and variable_names

    Should be able to call pysr(problem.X, problem.y, var_names=problem.var_names) and have it work
    """
    def __init__(self, X, y, form=None, variable_names=None):
        self.X = X
        self.y = y
        self.form = form
        self.variable_names = variable_names


class FeynmanProblem(Problem):
    """
    Stores the data for the problems from the 100 Feynman Equations on Physics.
    This is the benchmark used in the AI Feynman Paper
    """
    def __init__(self, row, gen=False, dp=500):
        """
        row: a row read as a dict from the FeynmanEquations dataset provided in the datasets folder of the repo
        gen: If true the problem will have dp X and y values randomly generated else they will be None
        """
        self.eq_id      = row['Filename']
        #self.form       = row['Formula']
        self.n_vars     = int(row['# variables'])
        super(FeynmanProblem, self).__init__(None, None, form=row['Formula'],
                                             variable_names=[row[f'v{i + 1}_name'] for i in range(self.n_vars)])
        #self.var_names  = [row[f'v{i+1}_name']  for i in range(self.n_vars)]
        self.low        = [float(row[f'v{i+1}_low'])   for i in range(self.n_vars)]
        self.high       = [float(row[f'v{i+1}_high'])  for i in range(self.n_vars)]
        self.dp         = dp#int(row[f'datapoints'])
        #self.X = None
        #self.Y = None
        if gen:
            self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
            d = {}
            for var in range(len(self.variable_names)):
                d[self.variable_names[var]] = self.X[:, var]
            d['exp'] = np.exp
            d['sqrt'] = np.sqrt
            d['pi'] = np.pi
            d['cos'] = np.cos
            d['sin'] = np.sin
            d['tan'] = np.tan
            d['tanh'] = np.tanh
            d['ln']   = np.log
            d['log'] = np.log # Quite sure the Feynman dataset has no base 10 logs
            d['arcsin'] = np.arcsin
            self.y = eval(self.form,d)
        return

    def __str__(self):
        return f"Feynman Equation: {self.eq_id}|Form: {self.form}"

    def __repr__(self):
        return str(self)

    def mk_problems(first=100, gen=False, dp=500, data_dir="datasets/FeynmanEquations.csv"):
        """

        first: the first "first" equations from the dataset will be made into problems
        data_dir: the path pointing to the Feynman Equations csv
        returns: list of FeynmanProblems
        """
        ret = []
        with open(data_dir) as csvfile:
            ind = 0
            reader = csv.DictReader(csvfile)
            for i, row in enumerate(reader):
                if ind > first:
                    break
                if row['Filename'] == '': continue
                try:
                    p = FeynmanProblem(row, gen=gen, dp=dp)
                    ret.append(p)
                except Exception as e:
                    traceback.print_exc()
                    #print(row)
                    print(f"FAILED ON ROW {i}")
                ind += 1
        return ret


def run_on_problem(problem, verbosity=0):
    """
    Takes in a problem and returns a tuple: (equations, best predicted equation, actual equation)
    """
    from time import time
    from . import pysr, best
    starting = time()
    equations = pysr(problem.X, problem.y, variable_names=problem.variable_names, verbosity=verbosity)
    timing = time()-starting
    others = {"equations": equations, "time": timing}
    return best(equations), problem.form, others

if __name__ == "__main__":
    ret = FeynmanProblem.mk_problems(first=100, gen=True)
    print(ret)