Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Oct 10, 2020

Commit

2ca2654

1 Parent(s): 3a557a9

Add parameter for batching

Browse files

Files changed (2) hide show

julia/sr.jl +26 -4
pysr/sr.py +8 -1

julia/sr.jl CHANGED Viewed

@@ -616,8 +616,11 @@ function iterate(member::PopMember, T::Float32)::PopMember
     prev = member.tree
     tree = copyNode(prev)
     #TODO - reconsider this
-    # beforeLoss = member.score
-    beforeLoss = scoreFuncBatch(member.tree)
     mutationChoice = rand()
     weightAdjustmentMutateConstant = min(8, countConstants(tree))/8.0
@@ -648,7 +651,11 @@ function iterate(member::PopMember, T::Float32)::PopMember
         return PopMember(tree, beforeLoss)
     end
-    afterLoss = scoreFuncBatch(tree)
     if annealing
         delta = afterLoss - beforeLoss
@@ -697,6 +704,16 @@ function bestOfSample(pop::Population)::PopMember
     return sample.members[best_idx]
 end
 # Return best 10 examples
 function bestSubPop(pop::Population; topn::Integer=10)::Population
     best_idx = sortperm([pop.members[member].score for member=1:pop.n])
@@ -1000,7 +1017,7 @@ function fullRun(niterations::Integer;
                 @async begin
                     allPops[i] = @spawnat :any let
                         tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
-                        for j=1:tmp_pop.n
                             if rand() < 0.1
                                 tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
                                 tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
@@ -1009,6 +1026,11 @@ function fullRun(niterations::Integer;
                                 end
                             end
                         end
                         tmp_pop
                     end
                     put!(channels[i], fetch(allPops[i]))

     prev = member.tree
     tree = copyNode(prev)
     #TODO - reconsider this
+    if batching
+        beforeLoss = scoreFuncBatch(member.tree)
+    else
+        beforeLoss = member.score
+    end
     mutationChoice = rand()
     weightAdjustmentMutateConstant = min(8, countConstants(tree))/8.0
         return PopMember(tree, beforeLoss)
     end
+    if batching
+        afterLoss = scoreFuncBatch(tree)
+    else
+        afterLoss = scoreFunc(tree)
+    end
     if annealing
         delta = afterLoss - beforeLoss
     return sample.members[best_idx]
 end
+function finalizeScores(pop::Population)::Population
+    need_recalculate = batching
+    if need_recalculate
+        @inbounds @simd for member=1:pop.n
+            pop.members[member].score = scoreFunc(pop.members[member].tree)
+        end
+    end
+    return pop
+end
 # Return best 10 examples
 function bestSubPop(pop::Population; topn::Integer=10)::Population
     best_idx = sortperm([pop.members[member].score for member=1:pop.n])
                 @async begin
                     allPops[i] = @spawnat :any let
                         tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
+                        @inbounds @simd for j=1:tmp_pop.n
                             if rand() < 0.1
                                 tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
                                 tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
                                 end
                             end
                         end
+                        if shouldOptimizeConstants
+                            #pass #(We already calculate full scores in the optimizer)
+                        else
+                            tmp_pop = finalizeScores(tmp_pop)
+                        end
                         tmp_pop
                     end
                     put!(channels[i], fetch(allPops[i]))

pysr/sr.py CHANGED Viewed

@@ -76,6 +76,8 @@ def pysr(X=None, y=None, weights=None,
             fast_cycle=False,
             maxdepth=None,
             variable_names=[],
             threads=None, #deprecated
             julia_optimization=3,
         ):
@@ -138,6 +140,10 @@ def pysr(X=None, y=None, weights=None,
         15% faster. May be algorithmically less efficient.
     :param variable_names: list, a list of names for the variables, other
         than "x0", "x1", etc.
     :param julia_optimization: int, Optimization level (0, 1, 2, 3)
     :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
         (as strings).
@@ -227,7 +233,8 @@ const nrestarts = {nrestarts:d}
 const perturbationFactor = {perturbationFactor:f}f0
 const annealing = {"true" if annealing else "false"}
 const weighted = {"true" if weights is not None else "false"}
-const batchSize = {min([50, len(X)]):d}
 const useVarMap = {"false" if len(variable_names) == 0 else "true"}
 const mutationWeights = [
     {weightMutateConstant:f},

             fast_cycle=False,
             maxdepth=None,
             variable_names=[],
+            batching=False,
+            batchSize=50,
             threads=None, #deprecated
             julia_optimization=3,
         ):
         15% faster. May be algorithmically less efficient.
     :param variable_names: list, a list of names for the variables, other
         than "x0", "x1", etc.
+    :param batching: bool, whether to compare population members on small batches
+        during evolution. Still uses full dataset for comparing against
+        hall of fame.
+    :param batchSize: int, the amount of data to use if doing batching.
     :param julia_optimization: int, Optimization level (0, 1, 2, 3)
     :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
         (as strings).
 const perturbationFactor = {perturbationFactor:f}f0
 const annealing = {"true" if annealing else "false"}
 const weighted = {"true" if weights is not None else "false"}
+const batching = {"true" if batching else "false"}
+const batchSize = {min([batchSize, len(X)]) if batching else len(X):d}
 const useVarMap = {"false" if len(variable_names) == 0 else "true"}
 const mutationWeights = [
     {weightMutateConstant:f},