Spaces:
Running
Running
AutonLabTruth
commited on
Commit
•
0aafc34
1
Parent(s):
181a454
Refactored till file creation
Browse files- pysr/sr.py +114 -90
pysr/sr.py
CHANGED
@@ -195,7 +195,6 @@ def pysr(X=None, y=None, weights=None,
|
|
195 |
X_filename, dataset_filename, hyperparam_filename, operator_filename, pkg_filename, runfile_filename, tmpdir, \
|
196 |
weights_filename, y_filename = set_paths(tempdir)
|
197 |
|
198 |
-
|
199 |
if isinstance(X, pd.DataFrame):
|
200 |
variable_names = list(X.columns)
|
201 |
X = np.array(X)
|
@@ -231,27 +230,99 @@ def pysr(X=None, y=None, weights=None,
|
|
231 |
#arbitrary complexity by default
|
232 |
handle_constraints(binary_operators, constraints, unary_operators)
|
233 |
|
234 |
-
constraints_str =
|
235 |
-
first = True
|
236 |
-
for op in unary_operators:
|
237 |
-
val = constraints[op]
|
238 |
-
if not first:
|
239 |
-
constraints_str += ", "
|
240 |
-
constraints_str += f"{val:d}"
|
241 |
-
first = False
|
242 |
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
-
first = True
|
247 |
-
for op in binary_operators:
|
248 |
-
tup = constraints[op]
|
249 |
-
if not first:
|
250 |
-
constraints_str += ", "
|
251 |
-
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
252 |
-
first = False
|
253 |
-
constraints_str += "]"
|
254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
256 |
{constraints_str}
|
257 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
@@ -290,7 +361,6 @@ const warmupMaxsize = {warmupMaxsize:d}
|
|
290 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
291 |
const useFrequency = {"true" if useFrequency else "false"}
|
292 |
"""
|
293 |
-
|
294 |
op_runner = ""
|
295 |
if len(binary_operators) > 0:
|
296 |
op_runner += """
|
@@ -301,14 +371,13 @@ const useFrequency = {"true" if useFrequency else "false"}
|
|
301 |
end"""
|
302 |
for i in range(1, len(binary_operators)):
|
303 |
op_runner += f"""
|
304 |
-
elseif i === {i+1}
|
305 |
@inbounds @simd for j=1:clen
|
306 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
307 |
end"""
|
308 |
op_runner += """
|
309 |
end
|
310 |
end"""
|
311 |
-
|
312 |
if len(unary_operators) > 0:
|
313 |
op_runner += """
|
314 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
@@ -318,85 +387,40 @@ end"""
|
|
318 |
end"""
|
319 |
for i in range(1, len(unary_operators)):
|
320 |
op_runner += f"""
|
321 |
-
elseif i === {i+1}
|
322 |
@inbounds @simd for j=1:clen
|
323 |
x[j] = {unary_operators[i]}(x[j])
|
324 |
end"""
|
325 |
op_runner += """
|
326 |
end
|
327 |
end"""
|
328 |
-
|
329 |
def_hyperparams += op_runner
|
330 |
-
|
331 |
-
def_datasets = """using DelimitedFiles"""
|
332 |
-
|
333 |
-
np.savetxt(X_filename, X, delimiter=',')
|
334 |
-
np.savetxt(y_filename, y, delimiter=',')
|
335 |
-
if weights is not None:
|
336 |
-
np.savetxt(weights_filename, weights, delimiter=',')
|
337 |
-
|
338 |
-
def_datasets += f"""
|
339 |
-
const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
|
340 |
-
const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
|
341 |
-
|
342 |
-
if weights is not None:
|
343 |
-
def_datasets += f"""
|
344 |
-
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
345 |
-
|
346 |
if use_custom_variable_names:
|
347 |
def_hyperparams += f"""
|
348 |
-
const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
349 |
-
|
350 |
-
with open(hyperparam_filename, 'w') as f:
|
351 |
-
print(def_hyperparams, file=f)
|
352 |
-
|
353 |
-
with open(dataset_filename, 'w') as f:
|
354 |
-
print(def_datasets, file=f)
|
355 |
-
|
356 |
-
with open(runfile_filename, 'w') as f:
|
357 |
-
print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
|
358 |
-
print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
|
359 |
-
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
360 |
-
print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
|
361 |
-
print(f'rmprocs(nprocs)', file=f)
|
362 |
-
|
363 |
-
|
364 |
-
command = [
|
365 |
-
f'julia', f'-O{julia_optimization:d}',
|
366 |
-
f'-p', f'{procs}',
|
367 |
-
str(runfile_filename),
|
368 |
-
]
|
369 |
-
if timeout is not None:
|
370 |
-
command = [f'timeout', f'{timeout}'] + command
|
371 |
-
|
372 |
-
global global_n_features
|
373 |
-
global global_equation_file
|
374 |
-
global global_variable_names
|
375 |
-
global global_extra_sympy_mappings
|
376 |
-
|
377 |
-
global_n_features = X.shape[1]
|
378 |
-
global_equation_file = equation_file
|
379 |
-
global_variable_names = variable_names
|
380 |
-
global_extra_sympy_mappings = extra_sympy_mappings
|
381 |
-
|
382 |
-
print("Running on", ' '.join(command))
|
383 |
-
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
384 |
-
try:
|
385 |
-
while True:
|
386 |
-
line = process.stdout.readline()
|
387 |
-
if not line: break
|
388 |
-
print(line.decode('utf-8').replace('\n', ''))
|
389 |
-
|
390 |
-
process.stdout.close()
|
391 |
-
process.wait()
|
392 |
-
except KeyboardInterrupt:
|
393 |
-
print("Killing process... will return when done.")
|
394 |
-
process.kill()
|
395 |
|
396 |
-
if delete_tempfiles:
|
397 |
-
shutil.rmtree(tmpdir)
|
398 |
|
399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
|
402 |
def handle_constraints(binary_operators, constraints, unary_operators):
|
|
|
195 |
X_filename, dataset_filename, hyperparam_filename, operator_filename, pkg_filename, runfile_filename, tmpdir, \
|
196 |
weights_filename, y_filename = set_paths(tempdir)
|
197 |
|
|
|
198 |
if isinstance(X, pd.DataFrame):
|
199 |
variable_names = list(X.columns)
|
200 |
X = np.array(X)
|
|
|
230 |
#arbitrary complexity by default
|
231 |
handle_constraints(binary_operators, constraints, unary_operators)
|
232 |
|
233 |
+
constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
+
def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
|
236 |
+
constraints_str, def_hyperparams, equation_file, fast_cycle,
|
237 |
+
fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,
|
238 |
+
maxsize, migration, nrestarts, operator_filename, parsimony,
|
239 |
+
perturbationFactor, populations, procs, shouldOptimizeConstants,
|
240 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names,
|
241 |
+
warmupMaxsize, weightAddNode, weightDeleteNode, weightDoNothing,
|
242 |
+
weightInsertNode, weightMutateConstant, weightMutateOperator,
|
243 |
+
weightRandomize, weightSimplify, weights)
|
244 |
+
|
245 |
+
def_datasets = make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename)
|
246 |
+
|
247 |
+
create_julia_files(dataset_filename, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
248 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity)
|
249 |
+
|
250 |
+
command = [
|
251 |
+
f'julia', f'-O{julia_optimization:d}',
|
252 |
+
f'-p', f'{procs}',
|
253 |
+
str(runfile_filename),
|
254 |
+
]
|
255 |
+
if timeout is not None:
|
256 |
+
command = [f'timeout', f'{timeout}'] + command
|
257 |
+
|
258 |
+
global global_n_features
|
259 |
+
global global_equation_file
|
260 |
+
global global_variable_names
|
261 |
+
global global_extra_sympy_mappings
|
262 |
+
|
263 |
+
global_n_features = X.shape[1]
|
264 |
+
global_equation_file = equation_file
|
265 |
+
global_variable_names = variable_names
|
266 |
+
global_extra_sympy_mappings = extra_sympy_mappings
|
267 |
+
|
268 |
+
print("Running on", ' '.join(command))
|
269 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
270 |
+
try:
|
271 |
+
while True:
|
272 |
+
line = process.stdout.readline()
|
273 |
+
if not line: break
|
274 |
+
print(line.decode('utf-8').replace('\n', ''))
|
275 |
+
|
276 |
+
process.stdout.close()
|
277 |
+
process.wait()
|
278 |
+
except KeyboardInterrupt:
|
279 |
+
print("Killing process... will return when done.")
|
280 |
+
process.kill()
|
281 |
+
|
282 |
+
if delete_tempfiles:
|
283 |
+
shutil.rmtree(tmpdir)
|
284 |
+
|
285 |
+
return get_hof()
|
286 |
+
|
287 |
+
|
288 |
+
def create_julia_files(dataset_filename, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
289 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity):
|
290 |
+
with open(hyperparam_filename, 'w') as f:
|
291 |
+
print(def_hyperparams, file=f)
|
292 |
+
with open(dataset_filename, 'w') as f:
|
293 |
+
print(def_datasets, file=f)
|
294 |
+
with open(runfile_filename, 'w') as f:
|
295 |
+
print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
|
296 |
+
print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
|
297 |
+
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
298 |
+
print(
|
299 |
+
f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
|
300 |
+
file=f)
|
301 |
+
print(f'rmprocs(nprocs)', file=f)
|
302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
+
def make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename):
|
305 |
+
def_datasets = """using DelimitedFiles"""
|
306 |
+
np.savetxt(X_filename, X, delimiter=',')
|
307 |
+
np.savetxt(y_filename, y, delimiter=',')
|
308 |
+
if weights is not None:
|
309 |
+
np.savetxt(weights_filename, weights, delimiter=',')
|
310 |
+
def_datasets += f"""
|
311 |
+
const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
|
312 |
+
const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
|
313 |
+
if weights is not None:
|
314 |
+
def_datasets += f"""
|
315 |
+
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
316 |
+
return def_datasets
|
317 |
+
|
318 |
+
|
319 |
+
def make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
|
320 |
+
def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
|
321 |
+
limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
|
322 |
+
parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
|
323 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
|
324 |
+
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
325 |
+
weightMutateOperator, weightRandomize, weightSimplify, weights):
|
326 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
327 |
{constraints_str}
|
328 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
|
|
361 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
362 |
const useFrequency = {"true" if useFrequency else "false"}
|
363 |
"""
|
|
|
364 |
op_runner = ""
|
365 |
if len(binary_operators) > 0:
|
366 |
op_runner += """
|
|
|
371 |
end"""
|
372 |
for i in range(1, len(binary_operators)):
|
373 |
op_runner += f"""
|
374 |
+
elseif i === {i + 1}
|
375 |
@inbounds @simd for j=1:clen
|
376 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
377 |
end"""
|
378 |
op_runner += """
|
379 |
end
|
380 |
end"""
|
|
|
381 |
if len(unary_operators) > 0:
|
382 |
op_runner += """
|
383 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
|
|
387 |
end"""
|
388 |
for i in range(1, len(unary_operators)):
|
389 |
op_runner += f"""
|
390 |
+
elseif i === {i + 1}
|
391 |
@inbounds @simd for j=1:clen
|
392 |
x[j] = {unary_operators[i]}(x[j])
|
393 |
end"""
|
394 |
op_runner += """
|
395 |
end
|
396 |
end"""
|
|
|
397 |
def_hyperparams += op_runner
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
if use_custom_variable_names:
|
399 |
def_hyperparams += f"""
|
400 |
+
const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
401 |
+
return def_hyperparams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
|
|
|
|
|
403 |
|
404 |
+
def make_constraints_str(binary_operators, constraints, unary_operators):
|
405 |
+
constraints_str = "const una_constraints = ["
|
406 |
+
first = True
|
407 |
+
for op in unary_operators:
|
408 |
+
val = constraints[op]
|
409 |
+
if not first:
|
410 |
+
constraints_str += ", "
|
411 |
+
constraints_str += f"{val:d}"
|
412 |
+
first = False
|
413 |
+
constraints_str += """]
|
414 |
+
const bin_constraints = ["""
|
415 |
+
first = True
|
416 |
+
for op in binary_operators:
|
417 |
+
tup = constraints[op]
|
418 |
+
if not first:
|
419 |
+
constraints_str += ", "
|
420 |
+
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
421 |
+
first = False
|
422 |
+
constraints_str += "]"
|
423 |
+
return constraints_str
|
424 |
|
425 |
|
426 |
def handle_constraints(binary_operators, constraints, unary_operators):
|